if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
if (!require("remotes", quietly = TRUE))
install.packages("remotes")
if (!require("pacman", quietly = TRUE))
install.packages("pacman")
if (!require("gtsummary", quietly = TRUE))
remotes::install_github("ddsjoberg/gtsummary")
if (!require("mixOmics", quietly = TRUE))
BiocManager::install("mixOmicsTeam/mixOmics")
if (!require("yingtools2", quietly = TRUE))
remotes::install_github("ying14/yingtools2")
if (!require("ggpirate", quietly = TRUE))
remotes::install_github("mikabr/ggpirate")
if (!require("paletteer", quietly = TRUE))
remotes::install_github("EmilHvitfeldt/paletteer")
if (!require("microbiomeMarker", quietly = TRUE))
BiocManager::install("microbiomeMarker")
if (!require("magick", quietly = TRUE))
install.packages("magick")
if (!require("mltest", quietly = TRUE))
install.packages("mltest")
if (!require("biostatUtil", quietly = TRUE))
remotes::install_github("talhouklab/biostatUtil")
library(pacman)
p_load(
factoextra, # Dimension reduction
FactoMineR, # Dimension reduction
stabs, # Stability variable selection
mboost, # Gradient boosting for model building
ggrepel, # Visualization, repels labels on plots
bestglm, # Logistic regression
knitr, # To change R markdown PDF options
caret, # To calculate model paramters
cutpointr, # Calculate cutpoints
RPostgreSQL, # Connect to PostgreSQL server
phyloseq, # Phyloseq data wrangling
# microbiomeMarker, # LEfSe analysis
ComplexHeatmap, # Heatmap generator
paletteer, # Extensive color palette
tidyverse, # Data wrangling and visualization
circlize, # Color ramp builder
rstatix, # Tidyverse statistics
EnhancedVolcano, # Volcano plot analysis
umap, # Uniform Manifold Approximation Projection
ggpubr, # Simple ggplots with stats
ggpirate, # ggplot version of Pirate plot
mixOmics, # PLS-DA
conflicted, # Forces conflicted packages to be used by preferred package
gridExtra, # Arrange multiple grobs
reticulate, # Run python in R
tableone, # Additional clinical tables support
lubridate, # Manipulate date objects
yingtools2, # Custom plotting functions
cowplot, # Combine plots together
epiR, # Obtain model performance measures
pROC, # Produce ROC curves
plotrix, # Add table to base R plot
glmnet, # LASSO regression
forcats, # Factor reordering
ggpmisc, # Miscellaneous ggplot helper functions
doParallel, # Parallelize functions (for optimizing cutpoints)
PRISMAstatement, # Flow table generator
DiagrammeRsvg, # Flow table visualizer
rsvg, # Flow table visualizer
scales, # Scale functions for visualizations
devtools, # To load packages from GitHub
# datscience, # Bibliography manager
formatR, # Markdown code formatter
openxlsx, # Excel worksheet manipulator
biostatUtil, # Mainly used for multi-level confusion matrix confidence intervals
irr, # Dependency package
rJava, # Dependency package
install = FALSE
)
library("gtsummary") # Clinical tables, pacman had trouble loading this in, so had to go the manual route
{conflict_prefer("select", "dplyr")
conflict_prefer("mutate", "dplyr")
conflict_prefer("filter", "dplyr")
conflict_prefer("rename", "dplyr")
conflict_prefer("slice", "dplyr")
conflict_prefer("between", "dplyr")
conflict_prefer("annotate", "ggplot2")}
devtools::source_url("https://github.com/yingeddi2008/DFIutility/blob/master/getRdpPal.R?raw=TRUE")
# ggplot theme shortcuts
et <- element_text
eb <- element_blank
er <- element_rect
el <- element_line
opts_chunk$set(tidy.opts = list(width.cutoff = 60), tidy = TRUE)
`%!in%` <- negate(`%in%`)
# Running python in R
use_condaenv("base", required = TRUE)# 1) Load R image
load("./Data/LT_Modeling.RData")
# # OR # # 2) Individually Load R Objects # Sample lookup
# sample_lookup <- readRDS('./Data/sample_lookup.rds') #
# First samples from all patients first_samps <-
# readRDS('./Data/first_samps_anon.rds') # Simplified
# dataframe containing infection information and relative
# abundance of targeted taxa peri_matrix_all <-
# readRDS('./Data/peri_matrix_clin_all.rds') # Distinct
# detailed infection data peri_criteria_best <-
# readRDS('./Data/peri_criteria_best_anon.rds') # All
# detailed infection data peri_criteria_all <-
# readRDS('./Data/peri_criteria_all_anon.rds') # NCBI
# taxonomy lookup tax_lookup <-
# readRDS('./Data/tax_lookup.rds') # Complete metaphlan
# dataframe for all patients and healthy donors
# metaphlan_df <- readRDS('./Data/metaphlan_anon.rds') #
# Metaphlan dataframe of patient samples
# metaphlan_peri_anon <-
# readRDS('./Data/metaphlan_peri_anon.rds') # Custom color
# palette metaphlan_pal <- getRdpPal(metaphlan_df) #
# Qualitative metabolomics metab_qual_anon <-
# readRDS('./Data/metab_qual_anonym.rds') # Quantitative
# metabolomics metab_quant_anon <-
# readRDS('./Data/metab_quant_anonym.rds') Antibiotics data
# abx <- readRDS('./Data/abx_anon.rds') abx <-
# readRDS('./Data/original_cohort_abx.rds') Demographics
# data demo <- readRDS('./Data/demo_anon.rds') # Bile acid
# gene data ba_genes <-
# readRDS('./Data/bile_acid_genes.rds') # CARD genes
# card_dict <- readRDS('./Data/card_dict.rds') card_dict <-
# card_dict %>% filter(dbsource == 'card') %>% mutate(
# AMRGeneFamily = ifelse( AMRGeneFamily == 'glycopeptide
# resistance gene cluster;van ligase', 'glycopeptide
# resistance gene cluster;vanA', AMRGeneFamily ),
# AMRGeneFamily = gsub( pattern = ';', replacement = '\n',
# x = AMRGeneFamily ) ) CARD data card2 <-
# readRDS('./Data/card.rds')# Custom function to avoid any errors during map
safe_cutpointr <- possibly(.f = cutpointr, otherwise = "Error")
# Calculate the number of cores
no_cores <- detectCores() - 2
# create the cluster for caret to use
cl <- makePSOCKcluster(no_cores)
registerDoParallel(cl)
set.seed(123456)
test_abundance <- peri_matrix_all %>%
mutate(sampleID = as.factor(sampleID)) %>%
select(starts_with(c("entero", "esch", "klebs", "citro",
"rahn", "proteus")), -ends_with("abs_abundance")) %>%
mutate_if(is.character, as.numeric) %>%
mutate(enterococcus_infection = ifelse(`Enterococcus faecium` +
`Enterococcus faecalis` + `Enterococcus avium` >= 1,
1, 0), enterobacterales_infection = ifelse(`Escherichia coli` +
`Klebsiella pneumoniae` + `Citrobacter freundii` + `Proteus mirabilis` >=
1, 1, 0)) %>%
select(-c(`Enterococcus faecium`, `Enterococcus faecalis`,
`Enterococcus avium`, `Escherichia coli`, `Klebsiella pneumoniae`,
`Citrobacter freundii`, `Proteus mirabilis`)) %>%
pivot_longer(-c(enterococcus_infection, enterobacterales_infection),
names_to = "variable", values_to = "value") %>%
pivot_longer(-c(variable, value), names_to = "infection_type",
values_to = "infection") %>%
mutate(variable = paste0("Input: ", variable, "\nPredict: ",
infection_type)) %>%
group_by(infection_type, variable) %>%
group_map(~safe_cutpointr(., value, infection, variable,
method = maximize_metric, metric = youden, pos_class = 1,
boot_runs = 100, allowParallel = TRUE, na.rm = T), .keep = TRUE)
# to get cutpoint object
test_abundance[4][[1]] # ecoc rel abd, ecoc infectiontest_abundance_unnest <- test_abundance %>%
map_df(as_tibble)
# obtain threshold cutpoints for relative abundance
optimal_cutpoint_rel <- test_abundance_unnest %>%
separate(subgroup, into = c("Input", "Predict"), sep = "\n",
remove = F) %>%
filter(grepl("enterobacterales", Input) & grepl("enterobacterales",
Predict) | grepl("enterococcus", Input) & grepl("enterococcus",
Predict)) %>%
group_by(pos_class) %>%
filter(grepl("rel", subgroup)) %>%
select(subgroup, optimal_cutpoint)# Expansions and Infections Stats
expan_infx_stats <- peri_matrix_all %>%
left_join(peri_matrix_all %>%
select(sampleID, ends_with("infection")) %>%
ungroup()) %>%
mutate(ecoc_infx = enterococcus_infection, ecoc_infx = ifelse(ecoc_infx >=
1, 1, 0), ebac_infx = enterobacterales_infection, ebac_infx = ifelse(ebac_infx >=
1, 1, 0)) %>%
select(enterococcus_rel_abundance, enterobacterales_rel_abundance,
ecoc_infx, ebac_infx) %>%
summarise(ecoc_expan_above_cutpoint = sum(enterococcus_rel_abundance >=
optimal_cutpoint_rel$optimal_cutpoint[2]), ecoc_infx_ecoc_below_cutpoint = sum(ecoc_infx ==
1 & enterococcus_rel_abundance < optimal_cutpoint_rel$optimal_cutpoint[2],
na.rm = T), ecoc_infx_ecoc_above_cutpoint = sum(ecoc_infx ==
1 & enterococcus_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[2],
na.rm = T), ecoc_noinfx_ecoc_below_cutpoint = sum(ecoc_infx ==
0 & enterococcus_rel_abundance < optimal_cutpoint_rel$optimal_cutpoint[2],
na.rm = T), ecoc_noinfx_ecoc_above_cutpoint = sum(ecoc_infx ==
0 & enterococcus_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[2],
na.rm = T), ebac_expan_above_cutpoint = sum(enterobacterales_rel_abundance >=
optimal_cutpoint_rel$optimal_cutpoint[1]), ebac_infx_ebac_below_cutpoint = sum(ebac_infx ==
1 & enterobacterales_rel_abundance < optimal_cutpoint_rel$optimal_cutpoint[1],
na.rm = T), ebac_infx_ebac_above_cutpoint = sum(ebac_infx ==
1 & enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1],
na.rm = T), ebac_noinfx_ebac_below_cutpoint = sum(ebac_infx ==
0 & enterobacterales_rel_abundance < optimal_cutpoint_rel$optimal_cutpoint[1],
na.rm = T), ebac_noinfx_ebac_above_cutpoint = sum(ebac_infx ==
0 & enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1],
na.rm = T)) %>%
rownames_to_column(var = "rowname") %>%
pivot_longer(-rowname, names_to = "metric", values_to = "count") %>%
mutate(percent = round((count/107) * 100, 2)) %>%
select(-rowname)
ecoc_infx_confusion <- expan_infx_stats %>%
filter(metric %in% c("ecoc_infx_ecoc_below_cutpoint", "ecoc_infx_ecoc_above_cutpoint",
"ecoc_noinfx_ecoc_below_cutpoint", "ecoc_noinfx_ecoc_above_cutpoint")) %>%
select(metric, count) %>%
pivot_wider(names_from = metric, values_from = count)
ecoc_infx_confusion_cnfs <- data.frame(Enterococcus = c("Infection",
"No Infection"), Expansion = c(ecoc_infx_confusion$ecoc_infx_ecoc_above_cutpoint,
ecoc_infx_confusion$ecoc_noinfx_ecoc_above_cutpoint), `No Expansion` = c(ecoc_infx_confusion$ecoc_infx_ecoc_below_cutpoint,
ecoc_infx_confusion$ecoc_noinfx_ecoc_below_cutpoint))
ecoc_infx_confusion_cnfs <- ecoc_infx_confusion_cnfs %>%
mutate(sensitivity = round(ecoc_infx_confusion_cnfs[1, 2]/(ecoc_infx_confusion_cnfs[1,
2] + ecoc_infx_confusion_cnfs[1, 3]), 3), specificity = round(ecoc_infx_confusion_cnfs[2,
3]/(ecoc_infx_confusion_cnfs[2, 3] + ecoc_infx_confusion_cnfs[2,
2]), 3), odds_ratio = round((ecoc_infx_confusion_cnfs[1,
2]/ecoc_infx_confusion_cnfs[1, 3])/(ecoc_infx_confusion_cnfs[2,
2]/ecoc_infx_confusion_cnfs[2, 3]), 3))
fig_5b <- test_abundance_unnest %>%
separate(subgroup, into = c("Input", "Predict"), sep = "\n",
remove = F) %>%
filter(grepl("enterobacterales", Input) & grepl("enterobacterales",
Predict) | grepl("enterococcus", Input) & grepl("enterococcus",
Predict)) %>%
group_by(pos_class) %>%
ungroup() %>%
unnest(roc_curve) %>%
arrange(desc(AUC)) %>%
select(-boot) %>%
mutate(auc_label = paste0("AUC = ", formatC(round(AUC, 3),
digits = 2, format = "f")), auc_label = case_when(grepl("enterococcus_rel_abundance",
Input) ~ paste0(auc_label, " [", formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
AUC, alpha = 0.05)[3][1, ], 2)), digits = 2, format = "f"),
", ", formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
AUC, alpha = 0.05)[3][2, ], 2)), digits = 2, format = "f"),
"]"), grepl("enterobacterales_rel_abundance", Input) ~
paste0(auc_label, " [", formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
AUC, alpha = 0.05)[3][1, ], 2)), digits = 2, format = "f"),
", ", formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
AUC, alpha = 0.05)[3][2, ], 2)), digits = 2,
format = "f"), "]")), acc_label = paste0("Accuracy = ",
formatC(round(acc, 3) * 100, digits = 0, format = "f"),
"%"), acc_label = case_when(grepl("enterococcus_rel_abundance",
Input) ~ paste0(acc_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
acc, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0, format = "f"),
"%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
acc, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0, format = "f"),
"%"), "]"), grepl("enterobacterales_rel_abundance", Input) ~
paste0(acc_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
acc, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
acc, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]")), sens_label = paste0("Sensitivity = ",
formatC(round(sensitivity, 3) * 100, digits = 0, format = "f"),
"%"), sens_label = case_when(grepl("enterococcus_rel_abundance",
Input) ~ paste0(sens_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
sensitivity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
sensitivity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]"), grepl("enterobacterales_rel_abundance",
Input) ~ paste0(sens_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
sensitivity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
sensitivity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]")), spec_label = paste0("Specificity = ",
formatC(round(specificity, 3) * 100, digits = 0, format = "f"),
"%"), spec_label = case_when(grepl("enterococcus_rel_abundance",
Input) ~ paste0(spec_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
specificity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
specificity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]"), grepl("enterobacterales_rel_abundance",
Input) ~ paste0(spec_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
specificity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
specificity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]"))) %>%
filter(grepl(pattern = "rel_abundance", x = Input)) %>%
mutate(subgroup = str_to_title(subgroup), subgroup = gsub(pattern = "_",
replacement = " ", x = subgroup), subgroup = gsub(pattern = "rel abundance",
replacement = "Relative Abundance (%)", x = subgroup),
subgroup2 = ifelse(grepl(x = subgroup, pattern = "enterococcus",
ignore.case = TRUE), "Enterococcus", "Enterobacterales"),
subgroup2 = factor(subgroup2, levels = c("Enterococcus",
"Enterobacterales")), odds_label = paste0("OR = ",
ecoc_infx_confusion_cnfs$odds_ratio[1])) %>%
filter(subgroup2 == "Enterococcus") %>%
ggplot(aes(x = fpr, y = tpr, color = subgroup2)) + geom_line(size = 1.2) +
geom_text(aes(label = auc_label, x = 0.45, y = 0.16), show.legend = F,
hjust = 0, size = 6) + geom_text(aes(label = acc_label,
x = 0.45, y = 0.12), show.legend = F, hjust = 0, size = 6) +
geom_text(aes(label = spec_label, x = 0.45, y = 0.08), show.legend = F,
hjust = 0, size = 6) + geom_text(aes(label = sens_label,
x = 0.45, y = 0.04), show.legend = F, hjust = 0, size = 6) +
geom_text(aes(label = paste0("Cutpoint = ", round((optimal_cutpoint),
digits = 3) * 100, "%"), x = 0.45, y = 0.2), hjust = 0,
size = 6, show.legend = F) + geom_text(aes(label = odds_label,
x = 0.45, y = 0), show.legend = F, hjust = 0, size = 6) +
theme_bw() + theme(axis.text = et(color = "black", size = 12),
axis.title = et(color = "black", size = 14), legend.text = et(size = 12),
legend.title = et(size = 14), legend.spacing.y = unit(0.5,
"cm"), legend.position = "none", panel.grid = eb(), strip.text = et(size = 18,
color = "#2dc46b", face = "bold"), strip.background = eb(),
panel.border = eb(), panel.spacing.y = unit(20, "mm"), axis.line.x = el(color = "black")) +
geom_vline(xintercept = -0.05) + geom_hline(yintercept = -0.05) +
geom_table_npc(data = ecoc_infx_confusion_cnfs %>%
column_to_rownames(var = "Enterococcus") %>%
select(Expansion, `No Expansion` = No.Expansion) %>%
mutate(Total = rowSums(.)), label = list(ecoc_infx_confusion_cnfs %>%
column_to_rownames(var = "Enterococcus") %>%
select(Expansion, `No Expansion` = No.Expansion) %>%
mutate(Total = rowSums(.))), npcx = 0.15, npcy = 0.6,
hjust = 0, vjust = 1, table.rownames = TRUE, table.theme = ttheme_minimal(base_size = 16,
core = list(bg_params = list(col = "#2dc46b"), fg_params = list(col = "#2dc46b")),
colhead = list(fg_params = list(col = "#2dc46b",
fontface = "bold")), rowhead = list(fg_params = list(col = "#2dc46b",
fontface = "bold")))) + scale_x_continuous(expand = expansion(add = c(0.001,
0.05))) + scale_y_continuous(expand = expansion(add = c(0.001,
0.05))) + facet_wrap(~subgroup2, ncol = 1, scales = "fixed") +
ylab("True Positive Rate\n") + xlab("\nFalse Positive Rate") +
scale_color_manual(values = c("#2dc46b")) + guides(color = guide_legend("Groups",
byrow = T, override.aes = list(size = 5)))
fig_5bebac_infx_confusion <- expan_infx_stats %>%
filter(metric %in% c("ebac_infx_ebac_below_cutpoint", "ebac_infx_ebac_above_cutpoint",
"ebac_noinfx_ebac_below_cutpoint", "ebac_noinfx_ebac_above_cutpoint")) %>%
select(metric, count) %>%
pivot_wider(names_from = metric, values_from = count)
ebac_infx_confusion_cnfs <- data.frame(Enterobacterales = c("Infection",
"No Infection"), Expansion = c(ebac_infx_confusion$ebac_infx_ebac_above_cutpoint,
ebac_infx_confusion$ebac_noinfx_ebac_above_cutpoint), `No Expansion` = c(ebac_infx_confusion$ebac_infx_ebac_below_cutpoint,
ebac_infx_confusion$ebac_noinfx_ebac_below_cutpoint))
ebac_infx_confusion_cnfs <- ebac_infx_confusion_cnfs %>%
mutate(sensitivity = round(ebac_infx_confusion_cnfs[1, 2]/(ebac_infx_confusion_cnfs[1,
2] + ebac_infx_confusion_cnfs[1, 3]), 3), specificity = round(ebac_infx_confusion_cnfs[2,
3]/(ebac_infx_confusion_cnfs[2, 3] + ebac_infx_confusion_cnfs[2,
2]), 3), odds_ratio = round((ebac_infx_confusion_cnfs[1,
2]/ebac_infx_confusion_cnfs[1, 3])/(ebac_infx_confusion_cnfs[2,
2]/ebac_infx_confusion_cnfs[2, 3]), 3))
fig_5d <- test_abundance_unnest %>%
separate(subgroup, into = c("Input", "Predict"), sep = "\n",
remove = F) %>%
filter(grepl("enterobacterales", Input) & grepl("enterobacterales",
Predict) | grepl("enterococcus", Input) & grepl("enterococcus",
Predict)) %>%
group_by(pos_class) %>%
ungroup() %>%
unnest(roc_curve) %>%
arrange(desc(AUC)) %>%
select(-boot) %>%
mutate(auc_label = paste0("AUC = ", formatC(round(AUC, 3),
digits = 2, format = "f")), auc_label = case_when(grepl("enterococcus_rel_abundance",
Input) ~ paste0(auc_label, " [", formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
AUC, alpha = 0.05)[3][1, ], 2)), digits = 2, format = "f"),
", ", formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
AUC, alpha = 0.05)[3][2, ], 2)), digits = 2, format = "f"),
"]"), grepl("enterobacterales_rel_abundance", Input) ~
paste0(auc_label, " [", formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
AUC, alpha = 0.05)[3][1, ], 2)), digits = 2, format = "f"),
", ", formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
AUC, alpha = 0.05)[3][2, ], 2)), digits = 2,
format = "f"), "]")), acc_label = paste0("Accuracy = ",
formatC(round(acc, 3) * 100, digits = 0, format = "f"),
"%"), acc_label = case_when(grepl("enterococcus_rel_abundance",
Input) ~ paste0(acc_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
acc, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0, format = "f"),
"%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
acc, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0, format = "f"),
"%"), "]"), grepl("enterobacterales_rel_abundance", Input) ~
paste0(acc_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
acc, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
acc, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]")), sens_label = paste0("Sensitivity = ",
formatC(round(sensitivity, 3) * 100, digits = 0, format = "f"),
"%"), sens_label = case_when(grepl("enterococcus_rel_abundance",
Input) ~ paste0(sens_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
sensitivity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
sensitivity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]"), grepl("enterobacterales_rel_abundance",
Input) ~ paste0(sens_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
sensitivity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
sensitivity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]")), spec_label = paste0("Specificity = ",
formatC(round(specificity, 3) * 100, digits = 0, format = "f"),
"%"), spec_label = case_when(grepl("enterococcus_rel_abundance",
Input) ~ paste0(spec_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
specificity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
specificity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]"), grepl("enterobacterales_rel_abundance",
Input) ~ paste0(spec_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
specificity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
specificity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]"))) %>%
filter(grepl(pattern = "rel_abundance", x = Input)) %>%
mutate(subgroup = str_to_title(subgroup), subgroup = gsub(pattern = "_",
replacement = " ", x = subgroup), subgroup = gsub(pattern = "rel abundance",
replacement = "Relative Abundance (%)", x = subgroup),
subgroup2 = ifelse(grepl(x = subgroup, pattern = "enterococcus",
ignore.case = TRUE), "Enterococcus", "Enterobacterales"),
subgroup2 = factor(subgroup2, levels = c("Enterococcus",
"Enterobacterales")), odds_label = paste0("OR = ",
ebac_infx_confusion_cnfs$odds_ratio[1])) %>%
filter(subgroup2 == "Enterobacterales") %>%
ggplot(aes(x = fpr, y = tpr, color = subgroup2)) + geom_line(size = 1.2) +
geom_text(aes(label = auc_label, x = 0.45, y = 0.16), show.legend = F,
hjust = 0, size = 6) + geom_text(aes(label = acc_label,
x = 0.45, y = 0.12), show.legend = F, hjust = 0, size = 6) +
geom_text(aes(label = spec_label, x = 0.45, y = 0.08), show.legend = F,
hjust = 0, size = 6) + geom_text(aes(label = sens_label,
x = 0.45, y = 0.04), show.legend = F, hjust = 0, size = 6) +
geom_text(aes(label = paste0("Cutpoint = ", round((optimal_cutpoint),
digits = 3) * 100, "%"), x = 0.45, y = 0.2), hjust = 0,
size = 6, show.legend = F) + geom_text(aes(label = odds_label,
x = 0.45, y = 0), show.legend = F, hjust = 0, size = 6) +
theme_bw() + theme(axis.text = et(color = "black", size = 12),
axis.title = et(color = "black", size = 14), legend.text = et(size = 12),
legend.title = et(size = 14), legend.spacing.y = unit(0.5,
"cm"), legend.position = "none", panel.grid = eb(), strip.text = et(size = 18,
color = "red", face = "bold"), strip.background = eb(),
panel.border = eb(), panel.spacing.y = unit(20, "mm"), axis.line.x = el(color = "black")) +
geom_vline(xintercept = -0.05) + geom_hline(yintercept = -0.05) +
geom_table_npc(data = ebac_infx_confusion_cnfs %>%
column_to_rownames(var = "Enterobacterales") %>%
select(Expansion, `No Expansion` = No.Expansion) %>%
mutate(Total = rowSums(.)), label = list(ebac_infx_confusion_cnfs %>%
column_to_rownames(var = "Enterobacterales") %>%
select(Expansion, `No Expansion` = No.Expansion) %>%
mutate(Total = rowSums(.))), npcx = 0.15, npcy = 0.5,
hjust = 0, vjust = 1, table.rownames = TRUE, table.theme = ttheme_minimal(base_size = 16,
core = list(bg_params = list(col = "red"), fg_params = list(col = "red")),
colhead = list(fg_params = list(col = "red", fontface = "bold")),
rowhead = list(fg_params = list(col = "red", fontface = "bold")))) +
scale_x_continuous(expand = expansion(add = c(0.001, 0.05))) +
scale_y_continuous(expand = expansion(add = c(0.001, 0.05))) +
facet_wrap(~subgroup2, ncol = 1, scales = "fixed") + ylab("True Positive Rate\n") +
xlab("\nFalse Positive Rate") + scale_color_manual(values = c("red")) +
guides(color = guide_legend("Groups", byrow = T, override.aes = list(size = 5)))
fig_5d# Heatmap compounds and their categories
heatmap_lookup <- read.csv("./Data/qual_heatmap_lookup.csv",
stringsAsFactors = FALSE)
# Build heatmap compound list
heatmap_cmpds <- metab_qual_anon %>%
mutate(compound = str_to_title(compound), compound = recode(compound,
Preq1 = "PreQ1")) %>%
filter(compound %in% heatmap_lookup$compound) %>%
distinct(compound) %>%
drop_na()
qual_log2fc_ecoc_expan <- metab_qual_anon %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate",
compound), compound = str_to_title(compound), compound = recode(compound,
Preq1 = "PreQ1")) %>%
filter(compound %in% heatmap_cmpds$compound) %>%
left_join(peri_matrix_all %>%
select(sampleID, enterococcus_rel_abundance)) %>%
drop_na() %>%
mutate(enterococcus_expansion = ifelse(enterococcus_rel_abundance >=
optimal_cutpoint_rel$optimal_cutpoint[2], 1, 0)) %>%
arrange(enterococcus_expansion, sampleID) %>%
group_by(compound) %>%
mutate(enterococcus_expansion_0 = length(mvalue[enterococcus_expansion ==
"0"]), enterococcus_expansion_1 = length(mvalue[enterococcus_expansion ==
"1"])) %>%
filter(any(mvalue != 0)) %>%
summarise(log2fc_val = log((mean(mvalue[enterococcus_expansion ==
"0"], na.rm = T)/mean(mvalue[enterococcus_expansion ==
"1"], na.rm = T)), base = 2)) # 0 = No Expansion, 1 = Expansion
qual_pval_ecoc_expan <- metab_qual_anon %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate",
compound), compound = str_to_title(compound), compound = recode(compound,
Preq1 = "PreQ1")) %>%
filter(compound %in% heatmap_cmpds$compound) %>%
left_join(peri_matrix_all %>%
select(sampleID, enterococcus_rel_abundance)) %>%
drop_na() %>%
mutate(enterococcus_expansion = ifelse(enterococcus_rel_abundance >=
optimal_cutpoint_rel$optimal_cutpoint[2], 1, 0)) %>%
arrange(enterococcus_expansion, sampleID) %>%
group_by(compound) %>%
mutate(enterococcus_expansion_0 = length(mvalue[enterococcus_expansion ==
"0"]), enterococcus_expansion_1 = length(mvalue[enterococcus_expansion ==
"1"])) %>%
filter(any(mvalue != 0)) %>%
rstatix::wilcox_test(mvalue ~ enterococcus_expansion) %>%
rstatix::adjust_pvalue(method = "BH") %>%
rstatix::add_significance("p.adj")
qual_tot_ecoc_expan <- left_join(qual_log2fc_ecoc_expan, qual_pval_ecoc_expan) %>%
column_to_rownames(var = "compound")
# volcano label color
ecoc_expan_volcano_labcol <- qual_tot_ecoc_expan %>%
filter(p.adj <= 0.05 & abs(log2fc_val) >= 0.75) %>%
mutate(color = ifelse(log2fc_val < 0, "#389458", "gray47"))
# # Volcano Plot (adjusted) set.seed(456)
# volcano_adj_ecoc_expan <-
# EnhancedVolcano(qual_tot_ecoc_expan, lab =
# rownames(qual_tot_ecoc_expan), x = 'log2fc_val', y =
# 'p.adj', title = NULL, pCutoff = 0.05, FCcutoff = 0.75,
# pointSize = 6, labSize = 8, axisLabSize = 32, labCol =
# ecoc_expan_volcano_labcol$color, caption = NULL, colAlpha
# = 0.65, col = c('gray75', c('#D4CA15', '#912777',
# '#1238E3')), xlim = c(-2.5, 4), ylim = c(0, 8),
# legendPosition = 'none', legendLabels =
# c(expression(p.adj > 0.05*';' ~ Log[2] ~ FC <
# '\u00B1'*0.75), expression(p.adj > 0.05*';' ~ Log[2] ~ FC
# >= '\u00B1'*0.75), expression(p.adj <= 0.05*';' ~ Log[2]
# ~ FC < '\u00B1'*0.75), expression(p.adj <= 0.05*';' ~
# Log[2] ~ FC >= '\u00B1'*0.75)), legendLabSize = 14,
# boxedLabels = T, drawConnectors = T, widthConnectors =
# 0.2, arrowheads = F, gridlines.minor = F, gridlines.major
# = F, max.overlaps = Inf ) + theme( axis.text = et(color =
# 'black'), legend.text = et(hjust = 0), plot.margin =
# unit(c(0, 0, 0, 0), 'cm') ) + labs(subtitle = NULL) +
# annotate('segment', x = 0.8, xend = 3, y = 7.95, yend =
# 7.95, arrow = arrow(), size = 2, color = 'gray67') +
# annotate('text', x = 1.65, y = 8.15, label = 'No
# Expansion', size = 9, color = 'gray67') +
# annotate('rect', xmin = 0.75, xmax = Inf, ymin =
# -log(0.05, base = 10), ymax = Inf, alpha = .1, fill =
# 'gray87') + annotate('segment', x = -0.8, xend = -2.5, y
# = 7.95, yend = 7.95, arrow = arrow(), size = 2, color =
# '#389458') + annotate('text', x = -1.55, y = 8.15, label
# = 'Expansion', size = 9, color = '#389458') +
# annotate('rect', xmin = -0.75, xmax = -Inf, ymin =
# -log(0.05, base = 10), ymax = Inf, alpha = .1, fill =
# '#389458') + guides(color = guide_legend(nrow = 4), shape
# = guide_legend(nrow = 4)) + scale_y_continuous(expand =
# expansion(add = c(0, 0.15))) volcano_adj_ecoc_expan
# ggsave(plot = volcano_adj_ecoc_expan, filename =
# './Results/Figure_5A.pdf', width = 24, height = 11)# Using unadjusted p-values for down-selection of
# metabolites, show distribution of normalized peak area
boxplot_ecoc_expan <- metab_qual_anon %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate",
compound), compound = str_to_title(compound), compound = recode(compound,
Preq1 = "PreQ1")) %>%
filter(compound %in% heatmap_cmpds$compound) %>%
left_join(peri_matrix_all %>%
select(sampleID, enterococcus_rel_abundance)) %>%
drop_na() %>%
mutate(enterococcus_expansion = ifelse(enterococcus_rel_abundance >=
optimal_cutpoint_rel$optimal_cutpoint[2], 1, 0), enterococcus_expansion = ifelse(enterococcus_expansion ==
1, "Expansion", "No Expansion")) %>%
arrange(enterococcus_expansion, sampleID) %>%
group_by(compound) %>%
mutate(enterococcus_expansion_0 = length(mvalue[enterococcus_expansion ==
"No Expansion"]), enterococcus_expansion_1 = length(mvalue[enterococcus_expansion ==
"Expansion"])) %>%
filter(any(mvalue != 0)) %>%
right_join(qual_tot_ecoc_expan %>%
rownames_to_column(var = "compound") %>%
mutate(abs_log2fc_val = abs(log2fc_val)) %>%
filter(p <= 0.05, abs_log2fc_val >= 1))
if (nrow(boxplot_ecoc_expan) > 0) {
print(ggpar(ggboxplot(boxplot_ecoc_expan, x = "enterococcus_expansion",
y = "mvalue", color = "enterococcus_expansion", palette = c("#389458",
"gray87"), ylab = "Normalized Peak Area", xlab = "",
outlier.shape = NA), legend.title = "Enterococcus") +
stat_compare_means(label.y.npc = 0.75) + facet_wrap(~compound,
scales = "free_y") + geom_point(data = boxplot_ecoc_expan,
aes(x = enterococcus_expansion, y = mvalue, color = enterococcus_expansion),
position = position_jitter(width = 0.2), size = 2, alpha = 0.65))
ggsave(filename = "./Results/enterococcus_expansion_boxplot.pdf",
width = 24, height = 18)
} else {
print("no significant observations")
}# Custom colors
pirate_colors <- c("#1A49BE", "#3A001E")
pirate_colors2 <- c("#3A001E", "#3A001E", "#3A001E", "#1A49BE")
t_metaphlan <- metaphlan_df %>%
filter(sampleID %in% first_samps$sampleID | grepl(sampleID,
pattern = "hd")) %>%
mutate(db = ifelse(grepl(sampleID, pattern = "lt"), "Liver Transplant",
"Healthy Donor")) %>%
select(sampleID, taxid, db, pctseqs, Total) %>%
group_by(sampleID, taxid, pctseqs) %>%
slice(1) %>%
ungroup() %>%
filter(pctseqs >= 1e-04) %>%
group_by(sampleID) %>%
dplyr::add_count(taxid, name = "totalSp") %>%
mutate(sampleID_count = length(unique(sampleID)), spPres = totalSp/sampleID_count) %>%
filter(spPres >= 0.1) %>%
select(-c(Total, sampleID_count, spPres, totalSp)) %>%
group_by(sampleID) %>%
mutate(pctseqs = pctseqs/sum(pctseqs))
t_metaphlan_mat <- t_metaphlan %>%
distinct() %>%
pivot_wider(names_from = "taxid", values_from = "pctseqs",
values_fill = 0) %>%
column_to_rownames(var = "sampleID") %>%
select(-db)
# taxUMAP microbiota table
taxumap_microbiota <- t_metaphlan_mat %>%
rownames_to_column(var = "index_column") %>%
pivot_longer(-index_column, names_to = "taxid", values_to = "pctseq") %>%
mutate(taxid = paste0("taxID", taxid)) %>%
pivot_wider(names_from = "taxid", values_from = "pctseq")
write.csv(taxumap_microbiota, "./Results/microbiota_table.csv",
row.names = FALSE)
# taxUMAP taxonomy table
taxumap_taxonomy <- t_metaphlan_mat %>%
rownames_to_column(var = "index_column") %>%
pivot_longer(-index_column, names_to = "taxid", values_to = "pctseq") %>%
left_join(tax_lookup %>%
mutate(taxid = as.character(taxid))) %>%
mutate(taxid = paste0("taxID", taxid)) %>%
distinct(Kingdom, Phylum, Class, Order, Family, Genus, Species,
taxid) %>%
transmute(OTU = taxid, Kingdom, Phylum = if_else(Phylum ==
"", "unclassified", Phylum), Class = if_else(Class ==
"", "unclassified", Class), Order = if_else(Order ==
"", "unclassified", Order), Family = if_else(Family ==
"", "unclassified", Family), Genus = if_else(Genus ==
"", "unclassified", Genus), Genus = gsub(" ", "_", Genus),
Species = if_else(Species == "", "unclassified", Species),
Species = gsub(" ", "_", Species))
write.csv(taxumap_taxonomy, "./Results/taxonomy.csv", row.names = FALSE)## ['', '/Users/nick/miniconda3/bin', '/Library/Frameworks/R.framework/Versions/4.2/Resources/library/reticulate/config', '/Users/nick/miniconda3/lib/python310.zip', '/Users/nick/miniconda3/lib/python3.10', '/Users/nick/miniconda3/lib/python3.10/lib-dynload', '/Users/nick/.local/lib/python3.10/site-packages', '/Users/nick/miniconda3/lib/python3.10/site-packages', '/Users/nick/Downloads/taxumap', '/Library/Frameworks/R.framework/Versions/4.2/Resources/library/reticulate/python']
from taxumap.taxumap_base import Taxumap
# From file
tu = Taxumap(taxonomy='./Results/taxonomy.csv', microbiota_data='./Results/microbiota_table.csv',random_state=456)## Phylum Class
## not monophyletic
## Class Order
## not monophyletic
## Order Family
## not monophyletic
## Family Genus
## not monophyletic
## post validate inputs main Kingdom ... Species
## ASV ...
## taxID817 Bacteria ... Bacteroides_fragilis
## taxID818 Bacteria ... Bacteroides_thetaiotaomicron
## taxID820 Bacteria ... Bacteroides_uniformis
## taxID821 Bacteria ... Phocaeicola_vulgatus
## taxID823 Bacteria ... Parabacteroides_distasonis
## ... ... ... ...
## taxID28447 Bacteria ... Clavibacter_michiganensis
## taxID33968 Bacteria ... Leuconostoc_pseudomesenteroides
## taxID709323 Bacteria ... Fructobacillus_tropaeoli
## taxID1070421 Bacteria ... Periweissella_fabalis
## taxID2749962 Bacteria ... Lactococcus_paracarnosus
##
## [672 rows x 7 columns]
## Taxumap(agg_levels = ['Phylum', 'Family'], weights = [1, 1])
#### Alpha Diversity ####
# Alpha diversity matrix: Inverse Simpson
alpha_invsim <- vegan::diversity(t_metaphlan_mat, index = "invsimpson") %>%
as.data.frame() %>%
rownames_to_column(var = "sampleID") %>%
dplyr::rename("InvSimpson" = ".")
# Alpha Diversity matrix: Shannon
alpha_shannon <- vegan::diversity(t_metaphlan_mat, index = "shannon") %>%
as.data.frame() %>%
rownames_to_column(var = "sampleID") %>%
dplyr::rename("Shannon" = ".")
# Alpha Diversity matrix: Observed ASVs
alpha_richness <- vegan::specnumber(t_metaphlan_mat) %>%
as.data.frame() %>%
rownames_to_column(var = "sampleID") %>%
dplyr::rename("Richness" = ".")
#### taxUMAP #####
# Find most abundant taxa per sample and plot just that
top_tax <- t_metaphlan %>%
group_by(sampleID) %>%
slice_max(pctseqs, n = 1) %>%
left_join(tax_lookup) %>%
mutate(across(everything(), ~ifelse(.=="", NA, as.character(.)))) %>%
replace_na(list(Species="unclassified",
Genus="unclassified",
Family="unclassified",
Order="unclassified",
Class="unclassified",
Phylum="unclassified")) %>%
mutate(Genus=ifelse(Genus=="unclassified",
paste(Family,Genus,sep="\n"),
as.character(Genus)),
pctseqs = as.numeric(pctseqs))
metaphlan_df2 <- t_metaphlan %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
left_join(tax_lookup) %>%
drop_na(taxid) %>%
arrange(Kingdom, Phylum, Class, Order, Family, Genus) %>%
mutate(Genus = paste0(Phylum,"-",Order,"-", Family, "-",Genus)) %>%
left_join(alpha_shannon) %>%
group_by(sampleID) %>%
arrange(Genus) %>%
mutate(cum.pct = cumsum(pctseqs),
y.text = (cum.pct + c(0, cum.pct[-length(cum.pct)]))/2) %>%
ungroup() %>%
dplyr::select(-cum.pct)
metaphlan_df_sumry <-
metaphlan_df2 %>%
group_by(sampleID) %>%
dplyr::slice(1) %>%
ungroup() %>%
mutate(healthy_min_shannon = min(Shannon[db == "Healthy Donor"]),
diversity_group = case_when(
db == "Healthy Donor" ~ "Healthy Donor",
Shannon >= healthy_min_shannon ~ "High Diversity",
TRUE ~ "TBD"
),
lt_med_shannon = median(Shannon[diversity_group == "TBD"], na.rm = TRUE),
diversity_group = case_when(
diversity_group %in% c("Healthy Donor", "High Diversity") ~ diversity_group,
Shannon >= lt_med_shannon ~ "Medium Diversity",
TRUE ~ "Low Diversity"
),
diversity_group = factor(
diversity_group,
levels = c(
"Low Diversity",
"Medium Diversity",
"High Diversity",
"Healthy Donor"
)
),
diversity_group_abv = gsub(pattern = " Diversity",
replacement = "",
x = diversity_group),
diversity_group_abv = factor(diversity_group_abv,
levels = c("Low", "Medium", "High", "Healthy Donor"))) %>%
arrange(Genus)
tax_umap_mat_plot <- py$embedding_df %>%
as.data.frame() %>%
rownames_to_column(var = "sampleID") %>%
left_join(t_metaphlan %>%
distinct(sampleID, db)) %>%
left_join(top_tax) %>%
ggplot(aes(x = taxumap1, y = taxumap2, color = Genus, shape = db, size = pctseqs*100, alpha = db))+
geom_point(fill = "black")+
theme_bw() +
theme(panel.grid = eb(),
axis.title = et(color = "black", size = 14),
axis.text = et(color = "black", size = 12),
axis.line = el(color = "black"),
# legend.title = et(color = "black", size = 14),
# legend.text = et(color = "black", size = 12),
legend.position = "right"
) +
xlab("taxUMAP1") +
ylab("taxUMAP2") +
guides(
shape = guide_legend(
title = "Cohort",
override.aes = list(size = 4),
order = 1,
ncol = 1,
title.position = "top"
),
size = guide_legend(
title = "Relative Abundance (%)",
order = 2,
ncol = 1,
title.position = "top"
),
color = "none",
fill = "none",
alpha = "none"
)+
scale_color_manual(values = metaphlan_pal)+
scale_shape_manual(values = c(24, 16))+
scale_alpha_manual(values = c(1, 0.45))
tax_umap_mat_plotggsave(plot = tax_umap_mat_plot,
filename = "./Results/Figure_1B.pdf",
height = 8, width = 8)
#### Alpha Diversity Stats #####
# Obtain stats for alpha diversity
diversity_comps <- list(
c("Healthy Donor", "High"),
c("High", "Medium"),
c("High", "Low"),
c("Medium", "Low")
)
alpha_stats <- alpha_invsim %>%
left_join(alpha_shannon) %>%
left_join(alpha_richness) %>%
inner_join(metaphlan_df2 %>%
left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group_abv)) %>%
distinct(sampleID, db, diversity_group_abv)
) %>%
pivot_longer(!c(sampleID, db, diversity_group_abv),names_to = "diversity_metric", values_to = "value") %>%
group_by(diversity_metric) %>%
rstatix::wilcox_test(value~diversity_group_abv,
comparisons = diversity_comps,
p.adjust.method = "BH",
alternative= "two.sided"
) %>%
ungroup()
# Create dataframe for all phylogentic levels of interest
phylo_rel_abd <- t_metaphlan %>%
left_join(tax_lookup) %>%
inner_join(metaphlan_df2 %>%
left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group_abv)) %>%
distinct(sampleID, db, diversity_group_abv)
) %>%
mutate(Species = paste(Kingdom, Phylum, Class, Order, Family, Genus, Species, sep = "|")) %>%
filter(grepl(pattern = "Enterococcus|Enterobacterales|Bacteroidetes|Lachnospiraceae|Oscillospiraceae", x = Species)) %>%
mutate(organism = case_when(grepl(pattern = "Enterococcus", x = Species) ~ "Enterococcus",
grepl(pattern = "Enterobacterales", x = Species) ~ "Enterobacterales",
grepl(pattern = "Bacteroidetes", x = Species) ~ "Bacteroidetes",
grepl(pattern = "Lachnospiraceae", x = Species) ~ "Lachnospiraceae",
grepl(pattern = "Oscillospiraceae", x = Species) ~ "Oscillospiraceae")) %>%
select(sampleID, db, diversity_group_abv, organism, pctseqs) %>%
group_by(sampleID, db, diversity_group_abv, organism) %>%
summarise(pctseqs = sum(pctseqs)) %>%
ungroup() %>%
pivot_wider(names_from = organism, values_from = pctseqs, values_fill = 0) %>%
pivot_longer(!c(sampleID, db, diversity_group_abv), names_to = "organism", values_to = "pctseqs")
# Obtain stats for all phylogentic levels of interest
rel_abd_alpha_stats <- phylo_rel_abd %>%
group_by(organism) %>%
rstatix::wilcox_test(pctseqs~diversity_group_abv) %>%
bind_rows(alpha_stats) %>%
rstatix::adjust_pvalue(method = "BH") %>%
mutate(p.adj = ifelse(p.adj < 0.001, 0.001, round(p.adj, 3)))
write.csv(rel_abd_alpha_stats, "./Results/Figure_1_Statistics.csv", row.names = FALSE)
symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf), symbols = c("****", "***", "**", "*", "ns"))
diversity_group_colors <- c("#3A001E", "#8A0246", "#C20463", "#1A49BE")
# #### Plot Inverse Simpson ####
#
#
# set.seed(456)
# gg_alpha_invsim <- alpha_invsim %>%
# inner_join(t_metaphlan %>%
# distinct(sampleID, db) %>%
# select(sampleID, db)
# ) %>%
# inner_join(metaphlan_df %>%
# left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group, diversity_group_abv)) %>%
# distinct(sampleID, db, diversity_group, diversity_group_abv)
# ) %>%
# mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
# ggplot(., aes(x = diversity_group_abv,
# y = InvSimpson,
# color = diversity_group_abv,
# fill = db)) +
# geom_boxplot(outlier.colour = NA,
# alpha = 0.35) +
# geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
# stat_compare_means(comparisons = diversity_comps,
# tip.length = 0.01,
# step.increase = 0.075,
# symnum.args = symnum.args,
# method.args = list(alternative = "two.sided",
# exact = FALSE)
# ) +
# theme_bw() +
# theme(
# panel.grid = eb(),
# axis.title.y = et(size = 14, color = "black"),
# axis.title.x = eb(),
# axis.text = et(size = 12, color = "black"),
# plot.margin = margin(t = 5, # Top margin
# r = 5, # Right margin
# b = 5, # Bottom margin
# l = 5), # Left margin
# panel.border = eb(),
# axis.line = el(color = 'black')
# ) +
# ylab("Alpha Diversity\n(Inverse Simpson)") +
# scale_fill_manual(values = rev(pirate_colors)) +
# scale_color_manual(values = diversity_group_colors) +
# guides(fill = guide_legend("Cohort"),
# color = guide_legend("Diversity Group",
# override.aes = aes(label = "")))+
# scale_y_continuous(breaks = seq(0,45,5),
# expand = expansion(mult = c(0.01, 0.1))) +
# coord_cartesian(xlim = c(1.1,3.9))
#
#
# gg_alpha_invsim
#
# pdf(file = "./Results/Figure_1C.pdf", height = 6, width = 7)
# gg_alpha_invsim
# invisible(dev.off())
#
# #### Plot Shannon Diversity ####
# set.seed(456)
# gg_alpha_shannon <- alpha_shannon %>%
# inner_join(t_metaphlan %>%
# distinct(sampleID, db) %>%
# select(sampleID, db)
# ) %>%
# inner_join(metaphlan_df %>%
# left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group, diversity_group_abv)) %>%
# distinct(sampleID, db, diversity_group, diversity_group_abv)
# ) %>%
# mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
# ggplot(., aes(x = diversity_group_abv,
# y = Shannon,
# color = diversity_group_abv,
# fill = db)) +
# geom_boxplot(outlier.colour = NA,
# alpha = 0.35) +
# geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
# stat_compare_means(comparisons = diversity_comps,
# tip.length = 0.01,
# step.increase = 0.075,
# symnum.args = symnum.args,
# method.args = list(alternative = "two.sided",
# exact = FALSE)
# ) +
# theme_bw() +
# theme(
# panel.grid = eb(),
# axis.title.y = et(size = 14, color = "black"),
# axis.title.x = eb(),
# axis.text = et(size = 12, color = "black"),
# plot.margin = margin(t = 5, # Top margin
# r = 5, # Right margin
# b = 5, # Bottom margin
# l = 5), # Left margin
# panel.border = eb(),
# axis.line = el(color = 'black')
# ) +
# ylab("Alpha Diversity\n(Shannon)") +
# scale_fill_manual(values = rev(pirate_colors)) +
# scale_color_manual(values = diversity_group_colors) +
# guides(fill = guide_legend("Cohort"),
# color = guide_legend("Diversity Group",
# override.aes = aes(label = "")))+
# scale_y_continuous(breaks = seq(0,6.5,1),
# expand = expansion(mult = c(0.01, 0.1))) +
# coord_cartesian(xlim = c(1.1,3.9))
#
#
# gg_alpha_shannon
#
# pdf(file = "./Results/Figure_1D.pdf", height = 6, width = 7)
# gg_alpha_shannon
# invisible(dev.off())
#
# #### Plot Species Richness ####
# set.seed(456)
# gg_alpha_richness <- alpha_richness %>%
# inner_join(t_metaphlan %>%
# distinct(sampleID, db) %>%
# select(sampleID, db)
# ) %>%
# inner_join(metaphlan_df %>%
# left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group, diversity_group_abv)) %>%
# distinct(sampleID, db, diversity_group, diversity_group_abv)
# ) %>%
# mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
# ggplot(., aes(x = diversity_group_abv,
# y = Richness,
# color = diversity_group_abv,
# fill = db)) +
# geom_boxplot(outlier.colour = NA,
# alpha = 0.35) +
# geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
# stat_compare_means(comparisons = diversity_comps,
# tip.length = 0.01,
# label.y = c(145, 160, 150, 110),
# symnum.args = symnum.args,
# method.args = list(alternative = "two.sided",
# exact = FALSE)
# ) +
# theme_bw() +
# theme(
# panel.grid = eb(),
# axis.title.y = et(size = 14, color = "black"),
# axis.title.x = eb(),
# axis.text = et(size = 12, color = "black"),
# plot.margin = margin(t = 5, # Top margin
# r = 5, # Right margin
# b = 5, # Bottom margin
# l = 5), # Left margin
# panel.border = eb(),
# axis.line = el(color = 'black')
# ) +
# ylab("Alpha Diversity\n(Richness)") +
# scale_fill_manual(values = rev(pirate_colors)) +
# scale_color_manual(values = diversity_group_colors) +
# guides(fill = guide_legend("Cohort"),
# color = guide_legend("Diversity Group",
# override.aes = aes(label = "")))+
# scale_y_continuous(breaks = seq(0,180,50),
# expand = expansion(mult = c(0.01, 0.035))) +
# coord_cartesian(xlim = c(1.1,3.9))
#
#
# gg_alpha_richness
#
# pdf(file = "./Results/Figure_1E.pdf", height = 6, width = 7)
# gg_alpha_richness
# invisible(dev.off())
#### Plot Lachnospiraceae ####
set.seed(456)
gg_lach_rel_abd <- phylo_rel_abd %>%
filter(organism == "Lachnospiraceae") %>%
group_by(organism) %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
ggplot(., aes(x = diversity_group_abv,
y = pctseqs,
color = diversity_group_abv,
fill = db)) +
geom_boxplot(outlier.colour = NA,
alpha = 0.35) +
geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
stat_compare_means(comparisons = diversity_comps,
tip.length = 0.01,
symnum.args = symnum.args,
method.args = list(alternative = "two.sided",
exact = FALSE),
label.y = c(0.65, 0.71, 0.76, 0.82)
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 12, color = "black"),
plot.margin = margin(t = 5, # Top margin
r = 5, # Right margin
b = 5, # Bottom margin
l = 5), # Left margin
panel.border = eb(),
axis.line = el(color = 'black')
) +
ylab(~atop(paste(italic("Lachnospiraceae")), paste("MetaPhlAn4 Relative Abundance"))) +
scale_fill_manual(values = rev(pirate_colors)) +
scale_color_manual(values = diversity_group_colors) +
guides(fill = guide_legend("Cohort"),
color = guide_legend("Diversity Group",
override.aes = aes(label = "")))+
scale_y_continuous(breaks = seq(0,1,0.1),
limits = c(-0.01,1.1),
expand = expansion(mult = c(0.01, 0.1)),
labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(xlim = c(1.1,3.9))
# gg_lach_rel_abd
# pdf(file = "./Results/Figure_1F.pdf", height = 6, width = 7)
# gg_lach_rel_abd
# invisible(dev.off())
#### Plot Bacteroidetes ####
set.seed(456)
gg_bact_rel_abd <- phylo_rel_abd %>%
filter(organism == "Bacteroidetes") %>%
group_by(organism) %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
ggplot(., aes(x = diversity_group_abv,
y = pctseqs,
color = diversity_group_abv,
fill = db)) +
geom_boxplot(outlier.colour = NA,
alpha = 0.35) +
geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
stat_compare_means(comparisons = diversity_comps,
tip.length = 0.01,
symnum.args = symnum.args,
method.args = list(alternative = "two.sided",
exact = FALSE),
label.y = c(0.65, 0.98, 0.92, 1.02)
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 12, color = "black"),
plot.margin = margin(t = 5, # Top margin
r = 5, # Right margin
b = 5, # Bottom margin
l = 5), # Left margin
panel.border = eb(),
axis.line = el(color = 'black')
) +
ylab(~atop(paste(italic("Bacteroidetes")), paste("MetaPhlAn4 Relative Abundance"))) +
scale_fill_manual(values = rev(pirate_colors)) +
scale_color_manual(values = diversity_group_colors) +
guides(fill = guide_legend("Cohort"),
color = guide_legend("Diversity Group",
override.aes = aes(label = "")))+
scale_y_continuous(breaks = seq(0,1,0.1),
limits = c(-0.01,1.1),
expand = expansion(mult = c(0.01, 0.1)),
labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(xlim = c(1.1,3.9))
# gg_bact_rel_abd
# pdf(file = "./Results/Figure_1I.pdf", height = 6, width = 7)
# gg_bact_rel_abd
# invisible(dev.off())
#### Plot Oscillospiraceae ####
set.seed(456)
gg_oscl_rel_abd <- phylo_rel_abd %>%
filter(organism == "Oscillospiraceae") %>%
group_by(organism) %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
ggplot(., aes(x = diversity_group_abv,
y = pctseqs,
color = diversity_group_abv,
fill = db)) +
geom_boxplot(outlier.colour = NA,
alpha = 0.35) +
geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
stat_compare_means(comparisons = diversity_comps,
tip.length = 0.01,
symnum.args = symnum.args,
method.args = list(alternative = "two.sided",
exact = FALSE),
label.y = c(0.33, 0.52, 0.59, 0.66)
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 12, color = "black"),
plot.margin = margin(t = 5, # Top margin
r = 5, # Right margin
b = 5, # Bottom margin
l = 5), # Left margin
panel.border = eb(),
axis.line = el(color = 'black')
) +
ylab(~atop(paste(italic("Oscillospiraceae")), paste("MetaPhlAn4 Relative Abundance"))) +
scale_fill_manual(values = rev(pirate_colors)) +
scale_color_manual(values = diversity_group_colors) +
guides(fill = guide_legend("Cohort"),
color = guide_legend("Diversity Group",
override.aes = aes(label = "")))+
scale_y_continuous(breaks = seq(0,1,0.1),
limits = c(-0.01,1.1),
expand = expansion(mult = c(0.01, 0.1)),
labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(xlim = c(1.1,3.9))
# gg_oscl_rel_abd
# pdf(file = "./Results/Figure_1J.pdf", height = 6, width = 7)
# gg_oscl_rel_abd
# invisible(dev.off())
#### Plot Enterococcus ####
set.seed(456)
gg_ecoc_rel_abd <- phylo_rel_abd %>%
filter(organism == "Enterococcus") %>%
group_by(organism) %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
ggplot(., aes(x = diversity_group_abv,
y = pctseqs,
color = diversity_group_abv,
fill = db)) +
geom_boxplot(outlier.colour = NA,
alpha = 0.35) +
geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
stat_compare_means(comparisons = diversity_comps,
tip.length = 0.01,
symnum.args = symnum.args,
method.args = list(alternative = "two.sided",
exact = FALSE),
label.y = c(0.20, 0.75, 0.985, 1.05)
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 12, color = "black"),
plot.margin = margin(t = 5, # Top margin
r = 5, # Right margin
b = 5, # Bottom margin
l = 5), # Left margin
panel.border = eb(),
axis.line = el(color = 'black')
) +
ylab(~atop(paste(italic("Enterococcus")), paste("MetaPhlAn4 Relative Abundance"))) +
scale_fill_manual(values = rev(pirate_colors)) +
scale_color_manual(values = diversity_group_colors) +
guides(fill = guide_legend("Cohort"),
color = guide_legend("Diversity Group",
override.aes = aes(label = "")))+
scale_y_continuous(breaks = seq(0,1,0.1),
limits = c(-0.01,1.1),
expand = expansion(mult = c(0.01, 0.1)),
labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(xlim = c(1.1,3.9))
# gg_ecoc_rel_abd
# pdf(file = "./Results/Figure_1G.pdf", height = 6, width = 7)
# gg_ecoc_rel_abd
# invisible(dev.off())
#### Plot Enterobacterales #####
set.seed(456)
gg_ebac_rel_abd <- phylo_rel_abd %>%
filter(organism == "Enterobacterales") %>%
group_by(organism) %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
ggplot(., aes(x = diversity_group_abv,
y = pctseqs,
color = diversity_group_abv,
fill = db)) +
geom_boxplot(outlier.colour = NA,
alpha = 0.35) +
geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
stat_compare_means(comparisons = diversity_comps,
tip.length = 0.01,
symnum.args = symnum.args,
method.args = list(alternative = "two.sided",
exact = FALSE),
label.y = c(0.20, 0.75, 0.985, 1.05)
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 12, color = "black"),
plot.margin = margin(t = 5, # Top margin
r = 5, # Right margin
b = 5, # Bottom margin
l = 5), # Left margin
panel.border = eb(),
axis.line = el(color = 'black')
) +
ylab(~atop(paste(italic("Enterobacterales")), paste("MetaPhlAn4 Relative Abundance"))) +
scale_fill_manual(values = rev(pirate_colors)) +
scale_color_manual(values = diversity_group_colors) +
guides(fill = guide_legend("Cohort"),
color = guide_legend("Diversity Group",
override.aes = aes(label = "")))+
scale_y_continuous(breaks = seq(0,1,0.1),
limits = c(-0.01,1.1),
expand = expansion(mult = c(0.01, 0.1)),
labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(xlim = c(1.1,3.9))
# gg_ebac_rel_abd
# pdf(file = "./Results/Figure_1H.pdf", height = 6, width = 7)
# gg_ebac_rel_abd
# invisible(dev.off())
#### Plot Metaphlan Relative Abundance ####
# Figure 1A-MetaPhlAn4 Taxonomy
# Load legend
tax_legend <- magick::image_read_pdf("./Data/legend.v2.pdf")
gg_tax_legend <- cowplot::ggdraw() + cowplot::draw_image(tax_legend)
metaphlan_pal2 <- getRdpPal(metaphlan_df2)
gg_metaphlan <- metaphlan_df2 %>%
left_join(metaphlan_df_sumry %>% select(sampleID, diversity_group)) %>%
ungroup() %>%
mutate(Genus = factor(Genus, levels = unique(Genus))) %>%
group_by(sampleID) %>%
arrange(Genus) %>%
ggplot(aes(x=reorder(sampleID, Shannon),y=pctseqs)) +
geom_bar(stat="identity",aes(fill=Genus), width = 0.9) +
scale_fill_manual(values = metaphlan_pal2) +
theme_bw() +
theme(legend.position = "none",
axis.text.x=eb(),
axis.ticks.x=eb(),
strip.text.x= et(angle=0,size=14),
strip.background = eb(),
axis.title.y = et(color = "black", size = 14),
axis.text.y = et(color = "black", size = 12),
panel.spacing = unit(0.5, "lines"),
plot.margin = margin(t = 5,
r = 5,
b = 0,
l = 5)) +
facet_grid(. ~diversity_group, scales = "free", space = "free")+
scale_y_continuous(expand = expansion(mult = c(0.005,0.005)),
labels = scales::percent_format(accuracy = 1)) +
ylab("MetaPhlAn4 Relative Abundance") +
xlab("")
# Color facets
gg_metaphlan_grob <- ggplot_gtable(ggplot_build(gg_metaphlan))
strip_both <- which(grepl('strip-', gg_metaphlan_grob$layout$name))
fills <- diversity_group_colors
k <- 1
for (i in strip_both) {
l <- which(grepl('titleGrob', gg_metaphlan_grob$grobs[[i]]$grobs[[1]]$childrenOrder))
gg_metaphlan_grob$grobs[[i]]$grobs[[1]]$children[[l]]$children[[1]]$gp$col <- fills[k]
k <- k+1
}
gg_shannon <- metaphlan_df_sumry %>%
ggplot(aes(x=reorder(sampleID, Shannon), y = Shannon)) +
geom_bar(stat="identity",aes(fill=diversity_group_abv), width = 0.9) +
theme_bw() +
theme(legend.position = "none",
axis.text.x = eb(),
axis.title.x = eb(),
axis.ticks.x = eb(),
strip.text = eb(),
strip.background = er(fill = "white"),
axis.title.y = et(color = "black", size = 14),
axis.text.y = et(color = "black", size = 12),
panel.spacing = unit(0.5, "lines"),
plot.margin = margin(t = 0,
r = 5,
b = 0,
l = 5),
panel.grid = eb()) +
scale_fill_manual(values = diversity_group_colors) +
facet_grid(. ~diversity_group, scales = "free", space = "free")
gg_metaphlan_shannon <-
plot_grid(
gg_metaphlan_grob,
gg_shannon,
axis = "lb",
align = "hv",
nrow = 2,
rel_heights = c(1, 0.15)
)
pdf(file = "./Results/Figure_1A.pdf", width = 12.25, height = 8)
gg_metaphlan_shannon
invisible(dev.off())
#### Combine all into a single figure 1 Start ####
alpha_org_plot <- plot_grid(
# gg_alpha_invsim + theme(legend.position = "none",
# axis.text.x = eb(),
# axis.title.x = eb(),
# plot.margin = unit(c(0.1, 0, 0.15, 0), "cm")),
# gg_alpha_shannon + theme(legend.position = "none",
# axis.text.x = eb(),
# axis.title.x = eb(),
# plot.margin = unit(c(0.1, 0, 0.15, 0), "cm")),
# gg_alpha_richness+ theme(legend.position = "none",
# axis.text.x = eb(),
# axis.title.x = eb(),
# plot.margin = unit(c(0.1, 0, 0.15, 0), "cm")),
gg_lach_rel_abd + theme(legend.position = "none",
axis.text.x = et(angle = 45, hjust = 0.85, vjust = 0.85),
axis.title.x = eb(),
plot.margin = unit(c(0.05, 0, 0, 0), "cm")),
gg_bact_rel_abd + theme(legend.position = "none",
axis.text.x = et(angle = 45, hjust = 0.85, vjust = 0.85),
plot.margin = unit(c(0.05, 0, 0, 0), "cm")),
gg_oscl_rel_abd + theme(axis.text.x = et(angle = 45, hjust = 0.85, vjust = 0.85),
plot.margin = unit(c(0.05, 0, 0, 0), "cm"),
legend.position = "right"),
gg_ecoc_rel_abd + theme(legend.position = "none",
axis.text.x = et(angle = 45, hjust = 0.85, vjust = 0.85),
plot.margin = unit(c(0.05, 0, 0, 0), "cm")),
gg_ebac_rel_abd + theme(legend.position = "none",
axis.text.x = et(angle = 45, hjust = 0.85, vjust = 0.85),
plot.margin = unit(c(0.05, 0, 0, 0), "cm")),
tax_umap_mat_plot +
theme(plot.margin = unit(c(0.05, 0, 0, 0), "cm")),
nrow = 2,
axis = "lb",
align = "v",
rel_widths = c(1, 1, 1.3),
rel_heights = c(1,1)
)
# This is useful to figure out the general layout of the plots
gs <- lapply(1:3, function(ii)
grobTree(rectGrob(gp=gpar(fill=ii, alpha=0.5)), textGrob(ii)))
# grid.arrange(grobs=gs, ncol=4,
# top="top label", bottom="bottom\nlabel",
# left="left label", right="right label")
lay <- rbind(c(1,1,1,1,1,NA),
c(1,1,1,1,1,2),
c(1,1,1,1,1,NA),
c(3,3,3,3,3,3),
c(3,3,3,3,3,3),
c(3,3,3,3,3,3),
c(3,3,3,3,3,3))
# grid.arrange(grobs = gs, layout_matrix = lay)
pdf(file = "./Results/Figure_1.pdf", height = 16, width = 20)
grid.arrange(
gg_metaphlan_shannon, # 1
gg_tax_legend, # 2
# tax_umap_mat_plot +
# theme(
# legend.text = et(size = 10),
# legend.title = et(size = 12),
# plot.margin = margin(
# t = 5, # Top margin
# r = 0, # Right margin
# b = 75, # Bottom margin
# l = 5 # Left margin
# )
# ), # 2
alpha_org_plot, # 3
layout_matrix = lay
)
invisible(dev.off())
grid.arrange(
gg_metaphlan_shannon, # 1
gg_tax_legend, # 2
# tax_umap_mat_plot +
# theme(
# legend.text = et(size = 10),
# legend.title = et(size = 12),
# plot.margin = margin(
# t = 5, # Top margin
# r = 0, # Right margin
# b = 75, # Bottom margin
# l = 5 # Left margin
# )
# ), # 2
alpha_org_plot, # 3
layout_matrix = lay
)gg_metaphlan_pathos <- metaphlan_df2 %>%
left_join(metaphlan_df_sumry %>% select(sampleID, diversity_group)) %>%
mutate(Species = paste(Kingdom, Phylum, Class, Order, Family, Genus, Species, sep = "-")) %>%
ungroup() %>%
filter(diversity_group != "Healthy Donor") %>%
mutate(Species = case_when(grepl(x = Species, pattern = "Enterococcus") ~ "Enterococcus",
grepl(x = Species, pattern = "Enterobacterales") ~ "Enterobacterales",
TRUE ~ "Other")) %>%
group_by(sampleID, diversity_group, Shannon, Species) %>%
summarise(pctseqs = sum(pctseqs)) %>%
mutate(pctseqs = ifelse(Species == "Other", 0, pctseqs)) %>%
ungroup() %>%
arrange(Species) %>%
mutate(Species = forcats::fct_relevel(Species)) %>%
group_by(sampleID) %>%
arrange(Species) %>% #distinct(sampleID)
ggplot(aes(x=reorder(sampleID, Shannon),y=pctseqs)) +
geom_bar(stat="identity",aes(fill=Species), width = 0.9) +
scale_fill_manual("Pathobiont",
values = c("#FF0000", "#0C7A3A", "#00000000"),
breaks = c("Enterobacterales", "Enterococcus", "")) +
# paletteer::scale_fill_paletteer_d(palette = "vapeplot::vaporwave"
# # "rcartocolor::Antique"
# # "MetBrewer::Renoir"
# ) +
theme_bw() +
theme(legend.position = "right",
axis.text.x=eb(),
axis.ticks.x=eb(),
strip.text.x= et(angle=0,size=14),
strip.background = eb(),
axis.title.y = et(color = "black", size = 14),
axis.text.y = et(color = "black", size = 12),
panel.spacing = unit(0.5, "lines"),
panel.grid.minor = eb(),
panel.grid.major.y = eb()) +
facet_grid(. ~diversity_group, scales = "free_x")+
scale_y_continuous(expand = expansion(mult = c(0.005,0.005)),
labels = scales::percent_format(accuracy = 1)) +
ylab("MetaPhlAn4 Relative Abundance") +
xlab("")
# gg_metaphlan_pathos
#### Bile Acid Genes ####
ba_genes_wilcox_test <- ba_genes %>%
filter(grepl(patientID, pattern = "^lt")) %>%
mutate(gene = factor(gene, levels = c("BaiA", "BaiA1", "BaiA2",
"BaiB", "BaiCD", "BaiE",
"BaiF", "BaiG", "BaiH",
"BaiI", "BSH",
"3aHSDH", "3bHSDH",
"7aHSDH", "7bHSDH",
"12abHSDH",
"5AR", "5BR"))) %>%
filter(sampleID %in% metaphlan_df2$sampleID) %>%
left_join(metaphlan_df_sumry %>% select(sampleID, Shannon, diversity_group)) %>%
group_by(gene) %>%
wilcox_test(tpm ~ diversity_group)
ba_genes_wilcox_effect <- ba_genes %>%
mutate(gene = factor(gene, levels = c("BaiA", "BaiA1", "BaiA2",
"BaiB", "BaiCD", "BaiE",
"BaiF", "BaiG", "BaiH",
"BaiI", "BSH",
"3aHSDH", "3bHSDH",
"7aHSDH", "7bHSDH",
"12abHSDH",
"5AR", "5BR"))) %>%
filter(sampleID %in% metaphlan_df2$sampleID) %>%
left_join(metaphlan_df_sumry %>% select(sampleID, Shannon, diversity_group)) %>%
group_by(gene) %>%
wilcox_effsize(tpm ~ diversity_group)
gg_ba_genes <- ba_genes %>%
filter(grepl(patientID, pattern = "^lt")) %>%
mutate(gene = factor(gene, levels = c("BaiA", "BaiA1", "BaiA2",
"BaiB", "BaiCD", "BaiE",
"BaiF", "BaiG", "BaiH",
"BaiI", "BSH",
"3aHSDH", "3bHSDH",
"7aHSDH", "7bHSDH",
"12abHSDH",
"5AR", "5BR"))) %>%
filter(sampleID %in% metaphlan_df2$sampleID) %>%
left_join(metaphlan_df_sumry %>% select(sampleID, Shannon, diversity_group)) %>%
ungroup() %>%
ggplot(aes(x = reorder(sampleID, Shannon), y = tpm, fill = diversity_group)) +
geom_col() +
theme_bw() +
theme(legend.position = "none",
axis.text.x = eb(),
axis.ticks.x = eb(),
strip.text.x = eb(),
strip.text.y = et(angle = 0, size = 14, hjust = 0),
strip.background = eb(),
axis.title.y = et(color = "black", size = 14),
axis.text.y = et(color = "black", size = 12),
panel.spacing = unit(0.5, "lines"),
plot.margin = margin(t = 5,
r = 5,
b = 0,
l = 5)) +
scale_fill_manual(values = diversity_group_colors) +
facet_grid(gene~diversity_group, scales = "free_x")+
scale_y_continuous(expand = expansion(mult = c(0.005,0.005))) +
ylab("Gene Abundance (TPM)") +
xlab("")
# gg_ba_genes
pdf(file = "./Results/Metaphlan_Bile_Acid.pdf", height = 26, width = 20, onefile = FALSE)
gg.stack(gg_metaphlan,
gg_ba_genes,
heights = c(1, 8))
dev.off()## quartz_off_screen
## 2
# Stats
gg_bile_gene_stats <-
ggstatsplot::grouped_ggbetweenstats(
data = ba_genes %>%
filter(grepl(patientID, pattern = "^lt")) %>%
mutate(gene = factor(
gene,
levels = c(
"BaiA",
"BaiA1",
"BaiA2",
"BaiB",
"BaiCD",
"BaiE",
"BaiF",
"BaiG",
"BaiH",
"BaiI",
"BSH",
"3aHSDH",
"3bHSDH",
"7aHSDH",
"7bHSDH",
"12abHSDH",
"5AR",
"5BR"
)
)) %>%
filter(sampleID %in% metaphlan_df2$sampleID) %>%
left_join(metaphlan_df_sumry %>% select(sampleID, Shannon, diversity_group)),
x = diversity_group,
y = tpm,
grouping.var = gene,
ggsignif.args = list(textsize = 4, tip_length = 0.01),
p.adjust.method = "BH",
type = "non-parametric",
# pairwise.comparisons = FALSE,
pairwise.display = "significant",
results.subtitle = FALSE,
ggplot.component = list(scale_fill_manual(values = diversity_group_colors),
scale_color_manual(values = diversity_group_colors)),
plotgrid.args = list(nrow = 3),
annotation.args = list(title = "Bile Acid Gene")
) #+
# geom_signif(comparisons = list(c("Medium Diversity", "High Diversity"),
# c("Medium Diversity", "Low Diversity")),
# test = "wilcox.test")
pdf(file = "./Results/Bile_Acid_Stats.pdf", height = 20, width = 40, onefile = FALSE)
gg_bile_gene_stats
dev.off()## quartz_off_screen
## 2
# Bile Genes Heatmap
gg_ba_genes_heatmap <-
ba_genes %>%
filter(grepl(patientID, pattern = "^lt")) %>%
mutate(gene = factor(gene, levels = c("BaiA", "BaiA1", "BaiA2",
"BaiB", "BaiCD", "BaiE",
"BaiF", "BaiG", "BaiH",
"BaiI", "BSH",
"3aHSDH", "3bHSDH",
"7aHSDH", "7bHSDH",
"12abHSDH",
"5AR", "5BR")),
gene = forcats::fct_rev(gene)) %>%
filter(sampleID %in% metaphlan_df2$sampleID) %>%
left_join(metaphlan_df_sumry %>% select(sampleID, Shannon, diversity_group)) %>%
group_by(gene) %>%
mutate(tpm_zscore = scale(tpm, scale = TRUE, center = TRUE),
tpm_log10 = log(tpm, base = 10),
tpm_log10 = ifelse(is.infinite(tpm_log10), 0, tpm_log10)) %>%
ggplot(aes(x = reorder(sampleID, Shannon), y = gene, fill = tpm_log10)) +
geom_tile(stat = "identity", color = "gray") +
theme_bw() +
theme(
panel.grid = eb(),
plot.background = eb(),
panel.border = er(colour = "black",
fill = NA,
linewidth = 0.5),
axis.title = et(color = "black", size = 14),
axis.text.y = et(color = "black", size = 12),
axis.text.x = eb(),
axis.ticks.x = eb(),
strip.text.x = eb(),
strip.text.y = et(angle = 0, size = 14, hjust = 0),
strip.background = eb(),
axis.line = el(color = "black")
) +
facet_grid(. ~diversity_group, scales = "free_x")+
ylab("Gene\n") +
xlab("") +
guides(fill = guide_legend(title = "TPM",
reverse = TRUE)) +
scale_fill_gradient2(low = "white",
mid= "cyan1",
high = "#0c0970",
midpoint = 3.5,
breaks = seq(1, 7, by = 1),
# labels = c("1", "2", "3", "4", "5", "6", "7"),
labels = scales::label_math(),
limits = c(0, 7)
) +
# scale_fill_fermenter(palette = "YlGnBu", direction = 1) +
# scale_x_discrete(expand=c(0,0))+
scale_y_discrete(expand=c(0,0))
gg_ba_genes_bars <-
ba_genes %>%
filter(grepl(patientID, pattern = "^lt")) %>%
mutate(gene = factor(gene, levels = c("BaiA", "BaiA1", "BaiA2",
"BaiB", "BaiCD", "BaiE",
"BaiF", "BaiG", "BaiH",
"BaiI", "BSH",
"3aHSDH", "3bHSDH",
"7aHSDH", "7bHSDH",
"12abHSDH",
"5AR", "5BR"))) %>%
filter(sampleID %in% metaphlan_df2$sampleID) %>%
left_join(metaphlan_df_sumry %>% select(sampleID, Shannon, diversity_group)) %>%
mutate(pres_abs = ifelse(tpm > 0, 1, 0)) %>%
group_by(sampleID, Shannon, diversity_group) %>%
summarise(pres_abs = sum(pres_abs)) %>%
# summarise(tot_pres_abs = sum(pres_abs)) %>%
# mutate(pct_pres = tot_pres_abs / length(unique(ba_genes$gene))) %>%
ggplot(aes(x = reorder(sampleID, Shannon), y = pres_abs, fill = diversity_group)) +
geom_col() +
geom_hline(yintercept = 18, linetype = "dashed") +
# annotate(geom = "text", x = 10, y = 18.5, label = "18 Genes Total") +
theme_bw() +
theme(
panel.grid = eb(),
plot.background = eb(),
panel.border = er(colour = "black",
fill = NA,
linewidth = 0.5),
axis.title = et(color = "black", size = 14),
axis.text.y = et(color = "black", size = 12),
axis.text.x = eb(),
axis.ticks.x = eb(),
strip.text.x = eb(),
strip.text.y = et(angle = 0, size = 14, hjust = 0),
strip.background = eb(),
axis.line = el(color = "black"),
legend.position = "none"
) +
facet_grid(. ~diversity_group, scales = "free_x")+
scale_fill_manual(values = diversity_group_colors) +
ylab("Genes Present (Total) \n") +
xlab("") +
guides(fill = guide_legend(title = "Diversity Group",
reverse = TRUE)) +
# scale_x_discrete(expand=c(0,0)) +
scale_y_continuous(expand = c(0,0),
breaks = c(seq(0,15,5), 18),
limits = c(0,20),
labels = c("0", "5", "10", "15", "18 Total Genes"))
ba_genes_stats <- ba_genes %>%
filter(grepl(patientID, pattern = "^lt")) %>%
mutate(gene = factor(gene, levels = c("BaiA", "BaiA1", "BaiA2",
"BaiB", "BaiCD", "BaiE",
"BaiF", "BaiG", "BaiH",
"BaiI", "BSH",
"3aHSDH", "3bHSDH",
"7aHSDH", "7bHSDH",
"12abHSDH",
"5AR", "5BR"))) %>%
filter(sampleID %in% metaphlan_df2$sampleID) %>%
left_join(metaphlan_df_sumry %>% select(sampleID, Shannon, diversity_group)) %>%
mutate(pres_abs = ifelse(tpm > 0, 1, 0)) %>%
group_by(sampleID, Shannon, diversity_group) %>%
summarise(tot_pres_abs = sum(pres_abs)) %>%
mutate(pct_pres = tot_pres_abs / length(unique(ba_genes$gene))) %>%
ungroup() %>%
kruskal_test(pct_pres ~ diversity_group)
# wilcox_test(pct_pres ~ diversity_group, comparisons = list(
# c("High Diversity", "Medium Diversity"),
# c("High Diversity", "Low Diversity"),
# c("Medium Diversity", "Low Diversity")
# ))
genes_stats_anno <- data.frame(diversity_group = c("Low Diversity",
"Medium Diversity",
"High Diversity"),
pct_pres = c(0.9, NA, NA),
label = c(paste0("Kruskal-Wallis, p = ", scales::scientific(ba_genes_stats$p)),
NA,
NA)) %>%
mutate(diversity_group = factor(diversity_group, levels = c("Low Diversity",
"Medium Diversity",
"High Diversity")))
gg_ba_genes_box <-
ba_genes %>%
filter(grepl(patientID, pattern = "^lt")) %>%
mutate(gene = factor(gene, levels = c("BaiA", "BaiA1", "BaiA2",
"BaiB", "BaiCD", "BaiE",
"BaiF", "BaiG", "BaiH",
"BaiI", "BSH",
"3aHSDH", "3bHSDH",
"7aHSDH", "7bHSDH",
"12abHSDH",
"5AR", "5BR"))) %>%
filter(sampleID %in% metaphlan_df2$sampleID) %>%
left_join(metaphlan_df_sumry %>% select(sampleID, Shannon, diversity_group)) %>%
mutate(pres_abs = ifelse(tpm > 0, 1, 0)) %>%
group_by(sampleID, Shannon, diversity_group) %>%
summarise(tot_pres_abs = sum(pres_abs)) %>%
mutate(pct_pres = tot_pres_abs / length(unique(ba_genes$gene))) %>%
ggplot(aes(x = diversity_group, y = pct_pres, fill = diversity_group)) +
geom_violin(trim = TRUE, alpha = 0.85) +
geom_boxplot(alpha = 0.5, fill = "white", width = 0.4) +
geom_jitter(color = "black", fill = "white", alpha = 0.5, shape = 21, size = 1.5, position = position_jitter(width = 0.15, seed = 123)) +
geom_text(data = genes_stats_anno, label = genes_stats_anno$label, nudge_x = -0.35) +
theme_bw() +
theme(
panel.grid = eb(),
plot.background = eb(),
panel.border = er(colour = "black",
fill = NA,
linewidth = 0.5),
axis.title = et(color = "black", size = 14),
axis.text.y = et(color = "black", size = 12),
axis.text.x = eb(),
axis.ticks.x = eb(),
strip.text.x = eb(),
strip.text.y = et(angle = 0, size = 14, hjust = 0),
strip.background = eb(),
axis.line = el(color = "black"),
legend.position = "right",
) +
facet_grid(. ~diversity_group, scales = "free", space = "free")+
scale_fill_manual(values = diversity_group_colors) +
ylab("Genes Present (%) \n") +
xlab("") +
guides(fill = guide_legend(title = "Diversity Group"),
color = guide_legend(title = "Diversity Group")) +
scale_x_discrete(expand = expansion(mult = c(2, 2))) +
scale_y_continuous(breaks = seq(0, 1, 0.25),
expand = expansion(mult = c(0.01, 0.03)),
labels = c("0%", "25%", "50%", "75%", "100%"))
# pdf(file = "./Results/Figure_5.pdf", height = 14, width = 20, onefile = FALSE)
# gg.stack(gg_metaphlan_family,
# # gg_metaphlan,
# gg_ba_genes_heatmap,
# gg_ba_genes_bars,
# gg_ba_genes_box,
# heights = c(0.2, 0.5, 0.2, 0.2),
# gap = 2)
# dev.off()
pdf(file = "./Results/Figure_4.pdf", height = 10, width = 16, onefile = FALSE)
(
(gg_metaphlan_pathos + theme(plot.margin = margin(t = 0, r = 5, b = -5, l = 5, "pt"))) /
patchwork:: plot_spacer() /
(gg_ba_genes_heatmap + theme(plot.margin = margin(t = 0, r = 5, b = -5, l = 5, "pt"))) /
patchwork::plot_spacer() /
(gg_ba_genes_bars + theme(plot.margin = margin(t = 0, r = 5, b = -5, l = 5, "pt"))) /
patchwork::plot_spacer() /
(gg_ba_genes_box + theme(plot.margin = margin(t = 0, r = 5, b = -5, l = 5, "pt")))
) +
patchwork::plot_layout(guides = 'collect',
heights = c(0.15,
-0.03, # Spacer
0.6,
-0.03, # Spacer
0.3,
-0.03, # Spacer
0.3))
invisible(dev.off())
{
((gg_metaphlan_pathos + theme(plot.margin = margin(t = 0, r = 5, b = -5, l = 5, "pt"))) /
patchwork:: plot_spacer() /
(gg_ba_genes_heatmap + theme(plot.margin = margin(t = 0, r = 5, b = -5, l = 5, "pt"))) /
patchwork::plot_spacer() /
(gg_ba_genes_bars + theme(plot.margin = margin(t = 0, r = 5, b = -5, l = 5, "pt"))) /
patchwork::plot_spacer() /
(gg_ba_genes_box + theme(plot.margin = margin(t = 0, r = 5, b = -5, l = 5, "pt")))
) +
patchwork::plot_layout(guides = 'collect',
heights = c(0.15,
-0.03, # Spacer
0.6,
-0.03, # Spacer
0.3,
-0.03, # Spacer
0.3))
}heatmap_data <-
t_metaphlan %>%
drop_na(taxid) %>%
select(sampleID) %>%
group_by(sampleID) %>%
dplyr::slice(1) %>%
mutate(db = ifelse(grepl(sampleID, pattern = "lt"), "Liver Transplant", "Healthy Donor")) %>%
left_join(metab_qual_anon) %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
compound = str_to_title(compound),
compound = recode(compound,
Preq1 = "PreQ1")) %>%
filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>%
group_by(compound) %>%
mutate(median_val = ifelse(median(mvalue, na.rm = TRUE) == 0, min(mvalue[mvalue > 0], na.rm = TRUE)/10, median(mvalue, na.rm = TRUE)),
heatmap_val = ifelse(log(mvalue/ median_val, base = 2) == -Inf, 0, log(mvalue/ median_val, base = 2))) %>%
ungroup() %>%
select(-c(mvalue, median_val)) %>%
group_by(sampleID, compound) %>%
slice_max(heatmap_val, with_ties = F, n = 1) %>%
ungroup() %>%
select(-db) %>%
left_join(metaphlan_df2 %>%
left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group_abv)) %>%
distinct(sampleID, db, diversity_group_abv), by = "sampleID") %>%
group_by(sampleID, compound) %>%
slice_max(heatmap_val, with_ties = F, n = 1) %>%
left_join(alpha_shannon) %>%
group_by(db) %>%
arrange(db, Shannon) %>%
ungroup()
# Stats
heatmap_pvals <-
heatmap_data %>%
filter(diversity_group_abv != "Healthy Donor") %>%
drop_na(compound) %>%
group_by(compound) %>%
rstatix::kruskal_test(heatmap_val~diversity_group_abv) %>%
rstatix::adjust_pvalue(method = "BH") %>%
select(compound, statistic, p, p.adj)
heatmap_labels <-
heatmap_data %>%
mutate(compound = str_to_title(compound)) %>%
left_join(heatmap_lookup) %>%
left_join(heatmap_pvals %>% group_by(compound, p.adj, p) %>% slice(1)) %>%
arrange(class, subclass, p) %>%
arrange(class, subclass) %>%
pivot_wider(c(diversity_group_abv, sampleID, db, Shannon), names_from = "compound", values_from = "heatmap_val", values_fn = mean) %>%
group_by(db) %>%
arrange(db, Shannon) %>%
ungroup() %>%
distinct(sampleID, .keep_all = TRUE) %>%
select(diversity_group_abv) %>%
mutate(
diversity_group_abv = as.character(diversity_group_abv),
diversity_group_abv = ifelse(
grepl(pattern = "Healthy", as.character(diversity_group_abv)),
diversity_group_abv,
paste(diversity_group_abv, "Diversity")
),
diversity_group_abv = factor(diversity_group_abv, levels = c("Low Diversity", "Medium Diversity",
"High Diversity", "Healthy Donor"))
)
# Build heatmap compound order (row order) and compound class (row slice)
heatmap_order <-
heatmap_data %>%
ungroup() %>%
filter(compound %in% heatmap_cmpds$compound) %>%
distinct(compound) %>%
drop_na() %>%
left_join(heatmap_lookup) %>%
mutate(class = case_when(class %in% c("Dicarboxylic Acid") ~ "Fatty Acid",
class %in% c("Phenolic Aromatic", "Kynurine Pathway") ~ "Add. Cmpds.",
TRUE ~ class),
subclass = case_when(subclass %in% c("Branched-Chain Fatty Acid",
"Aminated Fatty Acid",
"Long-Chain Fatty Acid",
"Dicarboxylic Acid") ~ "Other Fatty Acid",
subclass == "Indole" ~ "Tryptophan Metabs.",
subclass %in% c("Phenolic Aromatic", "Kynurine Pathway") ~ "Add. Cmpds.",
TRUE ~ subclass
)) %>%
left_join(heatmap_pvals %>% group_by(compound, p.adj, p) %>% slice(1)) %>%
ungroup() %>%
mutate(class = factor(
class,
levels = c(
"Fatty Acid", # 1
# "Dicarboxylic Acid",
"Amino Acid", # 2
"Bile Acid", # 3
"Indole", # 4
"Add. Cmpds.", # 5
# "Phenolic Aromatic",
# "Kynurine Pathway",
"Vitamin" # 6
)
),
subclass = factor(
subclass,
levels = c(
"Short-Chain Fatty Acid", # 1
"Other Fatty Acid", # 2
"Amino Acid", # 3
# "Aminated Fatty Acid",
# "Long-Chain Fatty Acid",
# "Dicarboxylic Acid",
"Primary Bile Acid", # 4
"Secondary Bile Acid", # 5
"Conjugated Bile Acid", # 6
"Tryptophan Metabs.", # 7
# "Phenolic Aromatic", #
# "Kynurine Pathway", #
"Vitamin", # 8
"Add. Cmpds." # 9
)
)) %>%
arrange(class, subclass, p, compound) %>%
select(class, subclass, p, compound)
# Build heatmap patient order following same order as gg_metaphlan plot
heatmap_column_order <- heatmap_data %>%
group_by(patientID) %>%
dplyr::slice(1) %>%
left_join(alpha_shannon) %>%
group_by(db) %>%
arrange(db, Shannon) %>%
select(patientID) %>%
distinct(patientID) %>%
pull(patientID)
# P-value legend color
pvalue_col_fun = colorRamp2(c(0, 0.045), c("#75C236", "#E3E4E6"))
# Create heatmap for adjusted p-values
pvalue_adj <-
heatmap_pvals %>%
group_by(compound, p.adj, p) %>%
dplyr::slice(1) %>%
ungroup() %>%
left_join(heatmap_lookup) %>%
mutate(class = case_when(class %in% c("Dicarboxylic Acid") ~ "Fatty Acid",
class %in% c("Phenolic Aromatic", "Kynurine Pathway") ~ "Add. Cmpds.",
TRUE ~ class),
subclass = case_when(subclass %in% c("Branched-Chain Fatty Acid",
"Aminated Fatty Acid",
"Long-Chain Fatty Acid",
"Dicarboxylic Acid") ~ "Other Fatty Acid",
subclass == "Indole" ~ "Tryptophan Metabs.",
subclass %in% c("Phenolic Aromatic", "Kynurine Pathway") ~ "Add. Cmpds.",
TRUE ~ subclass
)) %>%
left_join(heatmap_pvals %>% group_by(compound, p.adj, p) %>% slice(1)) %>%
ungroup() %>%
mutate(class = factor(
class,
levels = c(
"Fatty Acid", # 1
# "Dicarboxylic Acid",
"Amino Acid", # 2
"Bile Acid", # 3
"Indole", # 4
"Add. Cmpds.", # 5
# "Phenolic Aromatic",
# "Kynurine Pathway",
"Vitamin" # 6
)
),
subclass = factor(
subclass,
levels = c(
"Short-Chain Fatty Acid", # 1
"Other Fatty Acid", # 2
"Amino Acid", # 3
# "Aminated Fatty Acid",
# "Long-Chain Fatty Acid",
# "Dicarboxylic Acid",
"Primary Bile Acid", # 4
"Secondary Bile Acid", # 5
"Conjugated Bile Acid", # 6
"Tryptophan Metabs.", # 7
# "Phenolic Aromatic", #
# "Kynurine Pathway", #
"Vitamin", # 8
"Add. Cmpds." # 9
)
)) %>%
arrange(class, subclass, p.adj, compound) %>%
select(class, subclass, p.adj, compound) %>%
column_to_rownames(var = "compound") %>%
select(`Adjusted p-value` = p.adj) %>%
as.matrix() %>%
Heatmap(name = "Significance (adjusted p-value)",
cluster_rows = FALSE,
cluster_columns = FALSE,
col = pvalue_col_fun,
column_title = "KW Test",
column_title_side = "bottom",
column_title_rot = 90,
show_column_names = FALSE,
column_names_side = "top",
rect_gp = gpar(col = "black", lwd = 0.2),
row_gap = unit(3.5, "mm"),
row_names_side = "left",
row_order = heatmap_order$compound,
row_split = heatmap_order$subclass,
row_names_gp = gpar(fontsize = 19), # Compounds on y-axis
show_row_names = TRUE,
row_title_rot = 0,
row_title_gp = gpar(fontsize = 0),
heatmap_legend_param = list(at = seq(0.05, 0, -0.01)),
width = unit(0.1, "in")
)
# Heatmap legend color
col_fun <- colorRamp2(breaks = c(-5, 0, 5), colors = c("#00aaad", "white", "#ad003a"))
# Global parameter for annotation
# ht_opt$COLUMN_ANNO_PADDING <- unit(2.5, "mm")
gg_metab_heatmap <-
heatmap_data %>%
mutate(compound = str_to_title(compound),
compound = recode(compound,
Preq1 = "PreQ1")) %>%
left_join(heatmap_lookup) %>%
left_join(heatmap_pvals %>% group_by(compound, p.adj, p) %>% slice(1) %>% select(compound, p.adj, p), by = "compound") %>%
mutate(class = case_when(class %in% c("Dicarboxylic Acid") ~ "Fatty Acid",
class %in% c("Phenolic Aromatic", "Kynurine Pathway") ~ "Add. Cmpds.",
TRUE ~ class),
subclass = case_when(subclass %in% c("Branched-Chain Fatty Acid",
"Aminated Fatty Acid",
"Long-Chain Fatty Acid",
"Dicarboxylic Acid") ~ "Other Fatty Acid",
subclass == "Indole" ~ "Tryptophan Metabs.",
subclass %in% c("Phenolic Aromatic", "Kynurine Pathway") ~ "Add. Cmpds.",
TRUE ~ subclass
)) %>%
left_join(heatmap_pvals %>% group_by(compound, p.adj, p) %>% slice(1)) %>%
ungroup() %>%
mutate(class = factor(
class,
levels = c(
"Fatty Acid", # 1
# "Dicarboxylic Acid",
"Amino Acid", # 2
"Bile Acid", # 3
"Indole", # 4
"Add. Cmpds.", # 5
# "Phenolic Aromatic",
# "Kynurine Pathway",
"Vitamin" # 6
)
),
subclass = factor(
subclass,
levels = c(
"Short-Chain Fatty Acid", # 1
"Other Fatty Acid", # 2
"Amino Acid", # 3
# "Aminated Fatty Acid",
# "Long-Chain Fatty Acid",
# "Dicarboxylic Acid",
"Primary Bile Acid", # 4
"Secondary Bile Acid", # 5
"Conjugated Bile Acid", # 6
"Tryptophan Metabs.", # 7
# "Phenolic Aromatic", #
# "Kynurine Pathway", #
"Vitamin", # 8
"Add. Cmpds." # 9
)
)) %>%
mutate(
diversity_group_abv = as.character(diversity_group_abv),
diversity_group_abv = ifelse(
grepl(pattern = "Healthy", as.character(diversity_group_abv)),
diversity_group_abv,
paste(diversity_group_abv, "Diversity")
),
diversity_group_abv = factor(diversity_group_abv, levels = c("Low Diversity", "Medium Diversity",
"High Diversity", "Healthy Donor"))
) %>%
arrange(class, subclass, p.adj, compound) %>%
pivot_wider(c(diversity_group_abv, db, patientID, Shannon), names_from = compound, values_from = heatmap_val) %>%
group_by(patientID) %>%
arrange(patientID) %>%
ungroup() %>%
select(-`NA`) %>%
distinct(patientID, .keep_all = TRUE) %>%
group_by(diversity_group_abv) %>%
arrange(db, Shannon) %>%
ungroup() %>%
select(-Shannon, -db, -diversity_group_abv) %>%
column_to_rownames("patientID") %>%
as.matrix() %>%
t() %>%
Heatmap(
name = "Fold Change (log2)",
col = col_fun,
na_col = "grey83",
rect_gp = gpar(col = "grey40", lwd = 1.5),
# column_names_gp = grid::gpar(fontsize = 16),
column_gap = unit(2.5, "mm"),
column_split = heatmap_labels,
column_order = heatmap_column_order,
column_title_gp = gpar(fontsize = 20), # Diversity group labels on top of plot
column_title_rot = 0,
cluster_columns = FALSE,
show_column_names = FALSE,
show_column_dend = FALSE,
row_names_gp = gpar(fontsize = 16), # Compounds on y-axis
row_title_gp = gpar(fontsize = 18), # Compound classes on y-axis
row_title_side = "right", # Place the compound classes on right side of y-axis
row_gap = unit(3.5, "mm"),
row_names_side = c("left"),
row_order = heatmap_order$compound,
row_split = heatmap_order$subclass,
show_row_names = TRUE,
row_title_rot = 0,
cluster_rows = FALSE,
show_row_dend = FALSE,
heatmap_height = unit(24, "in"),
heatmap_width = unit(16.5, "in")
)
gg_metab_heatmap_tot <- pvalue_adj + gg_metab_heatmap
# gg_metab_heatmap_tot
pdf(file = "./Results/Figure_2.pdf", height = 24, width = 24, onefile = F)
gg_metab_heatmap_tot
invisible(dev.off())# Conversions of compounds
metab_conversions <- data.frame(
compound = c(
"Taurocholic Acid",
"Glycocholic Acid",
"Cholic Acid",
"3-Oxolithocholic Acid",
"Alloisolithocholic Acid",
"Deoxycholic Acid",
"Isodeoxycholic Acid",
"Lithocholic Acid",
"Kynurenic Acid",
"Kynurenine",
"Anthranilic Acid",
"Desaminotyrosine",
"Niacin",
"Tyrosine",
"Tryptamine",
"Tryptophan",
"Phenylalanine",
"Acetate",
"Butyrate",
"Propionate",
"Succinate"
),
molar_mass__gmol = c(
515.7,
465.6,
408.6,
374.6,
376.6,
392.6,
392.6,
376.6,
189.17,
208.21,
137.14,
166.17,
123.11,
181.19,
160.22,
204.22,
165.19,
59.04,
87.1,
73.07,
116.07
)
)
metab_quant_converted <- metab_quant_anon %>%
mutate(
compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
compound = str_to_title(compound)
) %>%
mutate(units = case_when(compound %in% c(
"Taurocholic Acid",
"Glycocholic Acid",
"Cholic Acid",
"3-Oxolithocholic Acid",
"Alloisolithocholic Acid",
"Deoxycholic Acid",
"Isodeoxycholic Acid",
"Lithocholic Acid"
) ~ "ugmL",
compound %in% c("Acetate", "Butyrate", "Succinate", "Propionate") ~ "mM",
TRUE ~ "uM")) %>%
right_join(metab_conversions, by = "compound") %>% # right_join to only keep compounds we're interested in
mutate(mvalue__mM = case_when(units == "ugmL" ~ (mvalue* #ugmL
((1000/ #1000 ml per L
1000000)/ #1000000 ug per gram
molar_mass__gmol)* #gram per mol (molar mass)
1000 #1000 mM per 1M
),
units == "uM" ~ mvalue/ #uM
1000, #1000uM per 1M
TRUE ~ mvalue #units are already in mM
)
) %>%
select(sampleID, compound, mvalue__mM)
metab_boxplot <-
metaphlan_df2 %>%
left_join(metaphlan_df_sumry) %>%
drop_na(taxid) %>%
select(sampleID, diversity_group_abv, db) %>%
group_by(sampleID) %>%
dplyr::slice(1) %>%
left_join(metab_quant_converted) %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
compound = str_to_title(compound)) %>%
ungroup() %>%
mutate(class = case_when(compound %in% c("Taurocholic Acid", "Glycocholic Acid") ~ "Conjugated Primary Bile Acid",
compound %in% c("Cholic Acid") ~ "Primary Bile Acid",
compound %in% c("3-Oxolithocholic Acid", "Alloisolithocholic Acid", "Deoxycholic Acid", "Isodeoxycholic Acid",
"Lithocholic Acid") ~ "Secondary Bile Acid",
compound %in% c("Threonine", "Glycine", "Tyrosine", "Tyramine", "Serine", "Leucine", "Isoleucine",
"Valine", "Phenylalanine", "Alanine", "Proline", "Aspartate",
"Methionine", "Glutamate", "Lysine", "Cysteine", "Tryptophan") ~ "Amino Acid",
compound %in% c("Acetate", "Butyrate", "Succinate", "Propionate") ~ "Short-Chain Fatty Acid",
compound %in% c("Kynurenic Acid", "Anthranilic Acid", "Kynurenine", "Tryptamine") ~ "Kynurenine Metabolite",
compound == "Desaminotyrosine" ~ "Phenolic Aromatic",
compound == "Niacin" ~ "B-Vitamin",
TRUE ~ "Indole"),
compound = case_when(class == "Conjugated Primary Bile Acid" ~ paste(compound, "(1ËšConj. BA)"),
class == "Primary Bile Acid" ~ paste(compound, "(1Ëš BA)"),
class == "Secondary Bile Acid" ~ paste(compound, "(2Ëš BA)"),
class == "Short-Chain Fatty Acid" ~ paste(compound, "(SCFA)"),
class == "Amino Acid" ~ paste(compound, "(AA)"),
class == "Phenolic Aromatic" ~ paste(compound, "(Phen. Arom.)"),
class == "Indole" ~ paste0(compound, "(Indole)"),
class == "Kynurenine Metabolite" ~ paste(compound, "(Kyn. Metab.)"),
class == "B-Vitamin" ~ paste(compound, "(B-Vitamin)")
)) %>%
drop_na() %>%
group_by(compound) %>%
mutate(count = length(unique(diversity_group_abv))) %>%
filter(count == 4) %>%
select(-count) %>%
mutate(compound = factor(
compound,
levels = c(
"Acetate (SCFA)", # 1
"Propionate (SCFA)", # 2
"Butyrate (SCFA)", # 3
"Succinate (SCFA)", # 4
"Tyrosine (AA)", # 5
"Tryptophan (AA)", # 6
"Phenylalanine (AA)", # 7
"Cholic Acid (1Ëš BA)", # 8
"Glycocholic Acid (1ËšConj. BA)", # 9
"Taurocholic Acid (1ËšConj. BA)", # 10
"Deoxycholic Acid (2Ëš BA)", # 11
"Lithocholic Acid (2Ëš BA)", # 12
"Isodeoxycholic Acid (2Ëš BA)", # 13
"3-Oxolithocholic Acid (2Ëš BA)", # 14
"Alloisolithocholic Acid (2Ëš BA)", # 15
"Desaminotyrosine (Phen. Arom.)", # 16
"Kynurenine (Kyn. Metab.)", # 17
"Anthranilic Acid (Kyn. Metab.)", # 18
"Tryptamine (Kyn. Metab.)", # 19
"Niacin (B-Vitamin)" # 20
)
),
class = factor(
class,
levels = c(
"Short-Chain Fatty Acid", # 1
"Amino Acid", # 2
"Primary Bile Acid", # 3
"Conjugated Primary Bile Acid", # 4
"Secondary Bile Acid", # 5
"Indole", # 6
"Phenolic Aromatic", # 7
"Kynurenine Metabolite", # 8
"B-Vitamin" # 9
)
)) %>%
filter(compound != "Kynurenic Acid") %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor")),
diversity_group_abv = factor(diversity_group_abv, levels = c("Low", "Medium", "High", "Healthy Donor"))) %>%
filter(compound %in% c(
"Acetate (SCFA)",
"Butyrate (SCFA)",
"Propionate (SCFA)",
"Glycocholic Acid (1ËšConj. BA)",
"Taurocholic Acid (1ËšConj. BA)",
"Cholic Acid (1Ëš BA)",
"Deoxycholic Acid (2Ëš BA)",
"Lithocholic Acid (2Ëš BA)",
"Alloisolithocholic Acid (2Ëš BA)"
))
metab_boxplot_stats <-
metab_boxplot %>%
group_by(class, compound) %>%
rstatix::wilcox_test(mvalue__mM~diversity_group_abv,
comparisons = diversity_comps,
p.adjust.method = "none",
alternative= "two.sided") %>%
rstatix::adjust_pvalue(method = "BH") %>%
rstatix::add_significance("p.adj") %>%
mutate(p.adj = ifelse(p.adj < 0.001, "p.adj < 0.001", paste("p.adj = ", round(p.adj, 2)))) %>%
mutate(y.position = case_when(group1 == "High" & group2 == "Healthy Donor" ~ 2.30,
group1 == "Medium" & group2 == "High" ~ 2.7,
group1 == "Low" & group2 == "High" ~ 3.1,
group1 == "Low" & group2 == "Medium" ~ 3.5)) # Set stats brackets to fixed points since the y-scale will be log10 transformed
# Summary statistics
metab_boxplot_summay_stats <-
tbl_summary(
metab_boxplot %>%
ungroup() %>%
select(sampleID, diversity_group_abv, compound, mvalue__mM) %>%
pivot_wider(
id_cols = c(sampleID, diversity_group_abv),
names_from = "compound",
values_from = "mvalue__mM"
) %>% column_to_rownames(var = "sampleID"),
by = diversity_group_abv,
type = all_continuous() ~ "continuous2",
statistic = all_continuous() ~ c("{mean} ({sd})", "{median} ({p25}, {p75})", "{min}-{max}"),
digits = all_continuous() ~ function(x) format(x, digits = 3, scientific = TRUE)
) %>%
bold_labels() %>%
italicize_levels()
metab_boxplot_summay_stats %>%
gtsummary::modify_caption("**Diversity Group Summary Statistics**")| Characteristic | Low, N = 36 | Medium, N = 39 | High, N = 26 | Healthy Donor, N = 21 |
|---|---|---|---|---|
| Acetate (SCFA) | ||||
| Â Â Â Â Mean (SD) | 6.54e+00 (9.29e+00) | 2.12e+01 (2.46e+01) | 3.33e+01 (2.40e+01) | 4.82e+01 (1.85e+01) |
| Â Â Â Â Median (IQR) | 1.63e+00 (3.38e-01, 1.16e+01) | 1.29e+01 (2.02e+00, 4.14e+01) | 3.13e+01 (1.51e+01, 4.62e+01) | 4.93e+01 (2.95e+01, 6.31e+01) |
| Â Â Â Â Minimum-Maximum | 0.0e+00-4.81e+01 | 0.0e+00-1.14e+02 | 6.5e-01-8.62e+01 | 2.0e+01-7.61e+01 |
| Alloisolithocholic Acid (2Ëš BA) | ||||
| Â Â Â Â Mean (SD) | 5.90e-04 (7.63e-04) | 6.70e-04 (1.04e-03) | 6.51e-03 (1.08e-02) | 3.00e-02 (3.40e-02) |
| Â Â Â Â Median (IQR) | 2.79e-04 (0.00e+00, 7.97e-04) | 1.59e-04 (0.00e+00, 1.02e-03) | 1.18e-03 (0.00e+00, 6.52e-03) | 1.23e-02 (7.41e-03, 3.20e-02) |
| Â Â Â Â Minimum-Maximum | 0e+00-2.55e-03 | 0e+00-5.12e-03 | 0e+00-4.28e-02 | 0e+00-1.07e-01 |
| Butyrate (SCFA) | ||||
| Â Â Â Â Mean (SD) | 4.19e-01 (3.80e-01) | 2.36e+00 (3.22e+00) | 5.88e+00 (6.08e+00) | 1.53e+01 (1.01e+01) |
| Â Â Â Â Median (IQR) | 4.60e-01 (6.00e-02, 5.80e-01) | 8.30e-01 (3.05e-01, 3.25e+00) | 3.89e+00 (1.99e+00, 6.86e+00) | 1.24e+01 (8.87e+00, 2.34e+01) |
| Â Â Â Â Minimum-Maximum | 0.0e+00-1.28e+00 | 0.0e+00-1.22e+01 | 4.0e-02-2.66e+01 | 1.7e+00-3.85e+01 |
| Cholic Acid (1Ëš BA) | ||||
| Â Â Â Â Mean (SD) | 4.81e-01 (1.33e+00) | 8.09e-01 (1.80e+00) | 6.09e-01 (2.50e+00) | 2.08e-02 (4.12e-02) |
| Â Â Â Â Median (IQR) | 2.60e-02 (2.43e-03, 1.83e-01) | 1.23e-01 (2.13e-02, 7.06e-01) | 5.09e-03 (1.68e-03, 6.89e-02) | 5.41e-03 (2.14e-03, 1.21e-02) |
| Â Â Â Â Minimum-Maximum | 0.00e+00-7.49e+00 | 1.10e-03-9.18e+00 | 0.00e+00-1.27e+01 | 4.85e-04-1.67e-01 |
| Deoxycholic Acid (2Ëš BA) | ||||
| Â Â Â Â Mean (SD) | 6.61e-04 (7.86e-04) | 2.61e-02 (1.28e-01) | 2.52e-01 (3.31e-01) | 1.56e+00 (2.01e+00) |
| Â Â Â Â Median (IQR) | 3.53e-04 (3.82e-05, 1.13e-03) | 6.62e-04 (5.09e-05, 3.40e-03) | 1.01e-01 (4.12e-02, 2.63e-01) | 9.39e-01 (4.81e-01, 1.79e+00) |
| Â Â Â Â Minimum-Maximum | 0.00e+00-2.75e-03 | 0.00e+00-8.02e-01 | 3.33e-03-1.11e+00 | 1.01e-01-8.87e+00 |
| Glycocholic Acid (1ËšConj. BA) | ||||
| Â Â Â Â Mean (SD) | 1.39e-01 (5.16e-01) | 1.76e-01 (6.96e-01) | 1.42e-03 (3.08e-03) | 4.67e-03 (9.85e-03) |
| Â Â Â Â Median (IQR) | 4.51e-03 (1.66e-04, 2.43e-02) | 2.13e-03 (8.40e-05, 5.12e-02) | 2.04e-04 (0.00e+00, 1.50e-03) | 5.80e-04 (8.38e-05, 2.90e-03) |
| Â Â Â Â Minimum-Maximum | 0e+00-2.93e+00 | 0e+00-4.31e+00 | 0e+00-1.45e-02 | 0e+00-4.17e-02 |
| Lithocholic Acid (2Ëš BA) | ||||
| Â Â Â Â Mean (SD) | 3.73e-04 (6.01e-04) | 1.96e-02 (7.89e-02) | 4.21e-01 (5.52e-01) | 9.03e-01 (7.03e-01) |
| Â Â Â Â Median (IQR) | 0.00e+00 (0.00e+00, 5.58e-04) | 6.37e-04 (1.99e-04, 1.94e-03) | 1.46e-01 (4.75e-02, 6.92e-01) | 6.88e-01 (4.32e-01, 1.14e+00) |
| Â Â Â Â Minimum-Maximum | 0.00e+00-1.75e-03 | 0.00e+00-4.41e-01 | 2.37e-03-2.24e+00 | 1.72e-01-2.86e+00 |
| Propionate (SCFA) | ||||
| Â Â Â Â Mean (SD) | 6.58e-01 (1.02e+00) | 5.13e+00 (6.45e+00) | 1.07e+01 (8.24e+00) | 1.63e+01 (7.21e+00) |
| Â Â Â Â Median (IQR) | 4.35e-01 (1.38e-01, 6.20e-01) | 1.87e+00 (5.15e-01, 7.83e+00) | 9.50e+00 (3.95e+00, 1.70e+01) | 1.76e+01 (1.04e+01, 2.06e+01) |
| Â Â Â Â Minimum-Maximum | 0.00e+00-5.81e+00 | 0.00e+00-2.39e+01 | 8.00e-02-2.80e+01 | 2.96e+00-2.89e+01 |
| Taurocholic Acid (1ËšConj. BA) | ||||
| Â Â Â Â Mean (SD) | 5.62e-02 (9.73e-02) | 1.10e-01 (2.91e-01) | 1.05e-03 (2.16e-03) | 9.58e-04 (1.60e-03) |
| Â Â Â Â Median (IQR) | 9.42e-03 (2.48e-04, 6.03e-02) | 1.32e-03 (1.45e-04, 2.19e-02) | 2.91e-05 (0.00e+00, 5.43e-04) | 2.33e-04 (0.00e+00, 1.24e-03) |
| Â Â Â Â Minimum-Maximum | 0e+00-4.01e-01 | 0e+00-1.60e+00 | 0e+00-9.71e-03 | 0e+00-6.73e-03 |
gt::gtsave(gtsummary::as_gt(metab_boxplot_summay_stats), file = "./Results/Metab_Quant_Summary_Stats.png",
vwidth = 1500, vheight = 1000)
set.seed(123) # for consistent jittering of points
gg_metab_boxplot <-
ggboxplot(metab_boxplot,
x = "diversity_group_abv",
y = "mvalue__mM",
fill = "db",
color = "diversity_group_abv",
alpha = 0.65,
outlier.shape = NA,
facet.by = c("class", "compound")) +
theme(legend.text = et(size = 12, color = "black"),
legend.title = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.title.y = et(size = 12, color = "black"),
panel.border = eb(),
strip.background = er(colour="white", fill="white"),
) +
geom_hline(yintercept = 0) +
geom_segment(aes(x = 0.35, y = 0, xend = 0.35, yend = Inf)) +
facet_wrap(~compound, scales = "fixed") +
stat_pvalue_manual(metab_boxplot_stats,
tip.length = 0.015) +
geom_point(
data = metab_boxplot,
aes(x = diversity_group_abv, y = mvalue__mM, color = diversity_group_abv),
position = position_jitter(width = 0.2),
size = 2,
alpha = 0.65) +
scale_fill_manual("Cohort", values = rev(pirate_colors)) +
scale_color_manual("Diversity Group", values = diversity_group_colors) +
scale_y_log10(limits = c(0.01, 5000),
labels = c(0.01, 0.1, 1, 10, 100, 1000),
breaks = c(0.01, 0.1, 1, 10, 100, 1000),
expand = expansion(mult = c(0.1, 0.2))) +
ylab("Concentration (mM)\n")
gg_metab_boxplotgg_ecoc_all_patients <- peri_matrix_all %>%
distinct(sampleID, .keep_all = T) %>%
select(sampleID, patientID, bact_infection_present) %>%
mutate(bact_infection_present = ifelse(bact_infection_present ==
"No", "No Infection", "Infection"), bact_infection_present = factor(bact_infection_present,
levels = c("Infection", "No Infection"))) %>%
left_join(metaphlan_peri_anon %>%
select(-bact_infection_present) %>%
group_by(patientID, sampleID) %>%
filter(grepl(x = Species, pattern = "Enterococcus", ignore.case = T)) %>%
count(sampleID, wt = pctseqs, name = "enterococcus_rel_abundance")) %>%
left_join(metaphlan_peri_anon %>%
select(patientID, sampleID, Kingdom:pctseqs)) %>%
ungroup() %>%
arrange(Kingdom, Phylum, Class, Order, Family, Genus) %>%
mutate(Genus = paste0(Phylum, "-", Order, "-", Family, "-",
Genus)) %>%
group_by(sampleID) %>%
arrange(Genus) %>%
mutate(cum.pct = cumsum(pctseqs), y.text = (cum.pct + c(0,
cum.pct[-length(cum.pct)]))/2) %>%
ungroup() %>%
dplyr::select(-cum.pct) %>%
mutate(sampleID = ifelse(is.na(sampleID), patientID, sampleID),
Genus = factor(Genus, levels = unique(Genus)), Genus = fct_relevel(Genus,
"Firmicutes-Lactobacillales-Enterococcaceae-Enterococcus",
after = Inf)) %>%
arrange(-enterococcus_rel_abundance) %>%
filter(bact_infection_present != "No Infection") %>%
ggplot(aes(x = reorder(sampleID, -enterococcus_rel_abundance),
y = pctseqs)) + geom_bar(aes(fill = Genus), stat = "identity") +
theme_bw() + theme(legend.position = "none", axis.text.x = et(angle = 90,
hjust = 0.5), strip.text.x = eb(), strip.background = eb(),
axis.title.y = et(color = "black", size = 12), axis.text.y = et(color = "black",
size = 10)) + scale_fill_manual(values = metaphlan_pal2) +
scale_y_continuous(expand = expansion(mult = c(0.005, 0.005))) +
ylab("Shotgun Relative Abundance (%)\n") + xlab("") + facet_grid(~bact_infection_present,
space = "free_x", scales = "free_x")
# gg_ecoc_all_patients
# Discrete heatmap of infections
ecoc_infx_orgs <- peri_criteria_all %>%
filter(sampleID %in% peri_matrix_all$sampleID) %>%
group_by(patientID, eday) %>%
arrange(-infx_stool) %>%
dplyr::slice(1) %>%
ungroup() %>%
select(patientID, sampleID, bact_infection_present, infx_stool,
organism1, micro1.factor) %>%
distinct() %>%
mutate(organism1 = gsub(x = organism1, pattern = "\\s+",
replacement = ""), organism1 = str_to_lower(string = organism1),
organism1 = ifelse(grepl(x = organism1, pattern = "enterococcus|enterobacterales|klebsiella|escherichia|citrobacter|proteus|staphyl|clostrid|pseudo|steno|bacteroides|helico"),
organism1, "Culture Negative")) %>%
group_by(patientID, sampleID, infx_stool) %>%
mutate(`Enterococcus faecium` = grepl(x = organism1, pattern = "enterococcusfaecium",
ignore.case = T), `Enterococcus faecalis` = grepl(x = organism1,
pattern = "enterococcusfaecalis", ignore.case = T), `Enterococcus avium` = grepl(x = organism1,
pattern = "enterococcusavium", ignore.case = T), `Enterococcus gallinarum` = grepl(x = organism1,
pattern = "enterococcusgallinarum", ignore.case = T),
`Klebsiella pneumoniae` = grepl(x = organism1, pattern = "klebsiellapneumoniae",
ignore.case = T), `Enterobacter cloaceae` = grepl(x = organism1,
pattern = "enterobactercloaceae", ignore.case = T),
`Escherichia coli` = grepl(x = organism1, pattern = "escherichiacoli",
ignore.case = T), `Citrobacter freundii` = grepl(x = organism1,
pattern = "citrobacterfreundii", ignore.case = T),
`Proteus mirabilis` = grepl(x = organism1, pattern = "proteusmirabilis",
ignore.case = T), `Staphylococcus aureus` = grepl(x = organism1,
pattern = "staphylococcusaureus", ignore.case = T),
`Staphylococcus epidermis` = grepl(x = organism1, pattern = "staphylococcusepidermidis",
ignore.case = T), `Pseudomonas aeruginosa` = grepl(x = organism1,
pattern = "pseudomonasaeruginosa", ignore.case = T),
`Stenotrophmonas maltophilia` = grepl(x = organism1,
pattern = "stenotrophmonasmaltophilia", ignore.case = T),
`Helicobacter pylori` = grepl(x = organism1, pattern = "helicobacterpylori",
ignore.case = T), `Clostridium difficile` = grepl(x = organism1,
pattern = "clostridiumdifficile|clostridioidesdifficile",
ignore.case = T), `Bacteroides sp.` = grepl(x = organism1,
pattern = "bacteroides", ignore.case = T), `Culture Negative` = grepl(x = organism1,
pattern = "Culture Negative", ignore.case = T)) %>%
pivot_longer(-c(patientID:micro1.factor), names_to = "organisms",
values_to = "org_presence") %>%
mutate(organisms = ifelse(bact_infection_present == "No",
"No Bacterial Infection", organisms), org_presence = ifelse(org_presence ==
TRUE, 1, 0)) %>%
group_by(sampleID, infx_stool, bact_infection_present, organisms) %>%
dplyr::slice_max(org_presence) %>%
ungroup() %>%
filter(org_presence == 1) %>%
left_join(metaphlan_peri_anon %>%
select(-bact_infection_present) %>%
group_by(patientID, sampleID) %>%
filter(grepl(x = Species, pattern = "Enterococcus", ignore.case = T)) %>%
count(sampleID, wt = pctseqs, name = "enterococcus_rel_abundance")) %>%
left_join(metaphlan_peri_anon %>%
select(patientID, sampleID, Kingdom:pctseqs)) %>%
group_by(patientID, sampleID, organisms, org_presence) %>%
dplyr::slice(1) %>%
mutate(sampleID = ifelse(is.na(sampleID), patientID, sampleID)) %>%
mutate(org_presence = ifelse(grepl(pattern = "No", x = bact_infection_present,
ignore.case = T), 0, org_presence)) %>%
ungroup() %>%
mutate(organisms = ifelse(bact_infection_present == "Yes" &
org_presence == 0, "Other", organisms)) %>%
arrange(-enterococcus_rel_abundance) %>%
mutate(organisms = ifelse(grepl(x = organisms, pattern = "enterococcus|klebsiella|escherichia|proteus|citrobacter|culture",
ignore.case = TRUE), organisms, "Other Bacterial Infection"),
organisms = ifelse(bact_infection_present == "No", "No Bacterial Infection",
organisms))
ecoc_infx_orgs_order <- ecoc_infx_orgs %>%
group_by(sampleID, patientID, organisms) %>%
distinct(sampleID, patientID, .keep_all = T) %>%
ungroup() %>%
mutate(organisms = ifelse(grepl(organisms, pattern = "gallinarum"),
"Other Bacterial Infection", organisms), org_colors = case_when(grepl(x = organisms,
pattern = "enterococcus faecium", ignore.case = T) ~
1, grepl(x = organisms, pattern = "enterococcus faecalis",
ignore.case = T) ~ 2, grepl(x = organisms, pattern = "enterococcus avium",
ignore.case = T) ~ 3, grepl(x = organisms, pattern = "klebsiella pneumoniae",
ignore.case = T) ~ 4, grepl(x = organisms, pattern = "escherichia coli",
ignore.case = T) ~ 5, grepl(x = organisms, pattern = "proteus mirabilis",
ignore.case = T) ~ 6, grepl(x = organisms, pattern = "citrobacter freundii",
ignore.case = T) ~ 7, grepl(x = organisms, pattern = "other bacterial infection|gallinarum",
ignore.case = T) ~ 8, grepl(x = organisms, pattern = "culture negative",
ignore.case = T) ~ 9, TRUE ~ 0), organisms = as.factor(organisms),
organisms = factor(organisms, levels = c("Enterococcus faecium",
"Enterococcus faecalis", "Enterococcus avium", "Klebsiella pneumoniae",
"Escherichia coli", "Proteus mirabilis", "Citrobacter freundii",
"Other Bacterial Infection", "Culture Negative",
"No Bacterial Infection")), org_colors = factor(org_colors,
levels = c("1", "2", "3", "4", "5", "6", "7", "8",
"9", "0"))) %>%
left_join(ecoc_infx_orgs) %>%
mutate(organisms = factor(organisms, levels = c("Enterococcus faecium",
"Enterococcus faecalis", "Enterococcus avium", "Klebsiella pneumoniae",
"Escherichia coli", "Proteus mirabilis", "Citrobacter freundii",
"Other Bacterial Infection", "Culture Negative", "No Bacterial Infection")),
org_colors = factor(org_colors, levels = c("1", "2",
"3", "4", "5", "6", "7", "8", "9", "0"))) %>%
drop_na(organisms) %>%
mutate(bact_infection_present = ifelse(bact_infection_present ==
"No", "No Infection", "Infection"), bact_infection_present = factor(bact_infection_present,
levels = c("Infection", "No Infection"))) %>%
ungroup() %>%
filter(bact_infection_present == "Infection") %>%
droplevels()
gg_ecoc_infx_orgs <- ecoc_infx_orgs_order %>%
ggplot(., aes(x = reorder(sampleID, -enterococcus_rel_abundance),
y = organisms, fill = as.factor(org_colors))) + geom_tile(color = "black") +
theme_bw() + theme(axis.title.y = et(color = "black", size = 12),
axis.text.y = et(color = "black", size = 10), axis.ticks.x = eb(),
axis.text.x = eb(), axis.title.x = eb(), panel.grid = eb(),
strip.text = eb()) + scale_fill_manual(values = c("#129246",
"#0C7A3A", "#08592B", "#FF0000", "#CC0404", "#8A0202", "#5C0202",
"#E6C66E", "#BD992D", "#00000000"), labels = c("Enterococcus faecium",
"Enterococcus faecalis", "Enterococcus avium", "Klebsiella pneumoniae",
"Escherichia coli", "Citrobacter freundii", "Proteus mirabilis",
"Other Bacterial Infection", "Culture Negative", "No Bacterial Infection")) +
labs(fill = "Infecting Organism", y = "Infecting Organism\n") +
scale_y_discrete(expand = expansion(mult = c(0.005, 0.005)),
limits = rev(levels(ecoc_infx_orgs_order$organisms))) +
facet_grid(. ~ bact_infection_present, space = "free_x",
scales = "free_x")
# gg_ecoc_infx_orgs
yingtools2::gg.stack(gg_ecoc_all_patients, gg_ecoc_infx_orgs,
heights = c(1, 0.5), adjust.themes = T)pdf(file = "./Results/Figure_5A.pdf", height = 8, width = 10,
onefile = F)
yingtools2::gg.stack(gg_ecoc_all_patients, gg_ecoc_infx_orgs,
heights = c(1, 0.5), adjust.themes = TRUE)
invisible(dev.off())
# Save object for combining later
fig_5a <- yingtools2::gg.stack(gg_ecoc_all_patients + theme(plot.margin = margin(t = 5,
r = 5, b = 5, l = 5)), gg_ecoc_infx_orgs + theme(legend.position = "none",
plot.margin = margin(t = 5, r = 5, b = 5, l = 5)), heights = c(1,
0.5), adjust.themes = TRUE, as.gtable = TRUE)gg_ebac_all_patients <- peri_matrix_all %>%
distinct(sampleID, .keep_all = T) %>%
select(sampleID, patientID, bact_infection_present) %>%
mutate(bact_infection_present = ifelse(bact_infection_present ==
"No", "No Infection", "Infection"), bact_infection_present = factor(bact_infection_present,
levels = c("Infection", "No Infection"))) %>%
left_join(metaphlan_peri_anon %>%
select(-bact_infection_present) %>%
group_by(patientID, sampleID) %>%
filter(grepl(x = Species, pattern = "Klebsiella pneumoniae|Escherichia coli|Citrobacter freundii|Proteus mirabilis")) %>%
count(sampleID, wt = pctseqs, name = "enterobacterales_rel_abundance")) %>%
left_join(metaphlan_peri_anon %>%
select(patientID, sampleID, Kingdom:pctseqs)) %>%
ungroup() %>%
arrange(Kingdom, Phylum, Class, Order, Family, Genus) %>%
mutate(Genus = paste0(Phylum, "-", Order, "-", Family, "-",
Genus)) %>%
group_by(sampleID) %>%
arrange(Genus) %>%
mutate(cum.pct = cumsum(pctseqs), y.text = (cum.pct + c(0,
cum.pct[-length(cum.pct)]))/2) %>%
ungroup() %>%
dplyr::select(-cum.pct) %>%
mutate(sampleID = ifelse(is.na(sampleID), patientID, sampleID),
Genus = factor(Genus, levels = unique(Genus)), Genus = fct_relevel(Genus,
c("Proteobacteria-Enterobacterales-Enterobacteriaceae-Citrobacter",
"Proteobacteria-Enterobacterales-Enterobacteriaceae-Enterobacter",
"Proteobacteria-Enterobacterales-Enterobacteriaceae-Escherichia",
"Proteobacteria-Enterobacterales-Enterobacteriaceae-Klebsiella",
"Proteobacteria-Enterobacterales-Morganellaceae-Proteus"),
after = Inf)) %>%
arrange(-enterobacterales_rel_abundance) %>%
filter(bact_infection_present == "Infection") %>%
ggplot(aes(x = reorder(sampleID, -enterobacterales_rel_abundance),
y = pctseqs)) + geom_bar(aes(fill = Genus), stat = "identity") +
theme_bw() + theme(legend.position = "none", axis.text.x = et(angle = 90,
hjust = 0.5), strip.text.x = eb(), strip.background = eb(),
axis.title.y = et(color = "black", size = 12), axis.text.y = et(color = "black",
size = 10)) + scale_fill_manual(values = metaphlan_pal2) +
scale_y_continuous(expand = expansion(mult = c(0.005, 0.005))) +
ylab("Shotgun Relative Abundance (%)\n") + xlab("") + facet_grid(~bact_infection_present,
space = "free_x", scales = "free_x")
# gg_ebac_all_patients
# Discrete heatmap of infections
ebac_infx_orgs <- peri_criteria_all %>%
filter(sampleID %in% peri_matrix_all$sampleID) %>%
group_by(patientID, eday) %>%
arrange(-infx_stool) %>%
dplyr::slice(1) %>%
ungroup() %>%
select(patientID, sampleID, bact_infection_present, infx_stool,
organism1, micro1.factor) %>%
distinct() %>%
mutate(organism1 = gsub(x = organism1, pattern = "\\s+",
replacement = ""), organism1 = str_to_lower(string = organism1),
organism1 = ifelse(grepl(x = organism1, pattern = "enterococcus|enterobacterales|klebsiella|escherichia|citrobacter|proteus|staphyl|clostrid|pseudo|steno|bacteroides|helico"),
organism1, "Culture Negative")) %>%
group_by(patientID, sampleID, infx_stool) %>%
mutate(`Enterococcus faecium` = grepl(x = organism1, pattern = "enterococcusfaecium",
ignore.case = T), `Enterococcus faecalis` = grepl(x = organism1,
pattern = "enterococcusfaecalis", ignore.case = T), `Enterococcus avium` = grepl(x = organism1,
pattern = "enterococcusavium", ignore.case = T), `Enterococcus gallinarum` = grepl(x = organism1,
pattern = "enterococcusgallinarum", ignore.case = T),
`Klebsiella pneumoniae` = grepl(x = organism1, pattern = "klebsiellapneumoniae",
ignore.case = T), `Enterobacter cloaceae` = grepl(x = organism1,
pattern = "enterobactercloaceae", ignore.case = T),
`Escherichia coli` = grepl(x = organism1, pattern = "escherichiacoli",
ignore.case = T), `Citrobacter freundii` = grepl(x = organism1,
pattern = "citrobacterfreundii", ignore.case = T),
`Proteus mirabilis` = grepl(x = organism1, pattern = "proteusmirabilis",
ignore.case = T), `Staphylococcus aureus` = grepl(x = organism1,
pattern = "staphylococcusaureus", ignore.case = T),
`Staphylococcus epidermis` = grepl(x = organism1, pattern = "staphylococcusepidermidis",
ignore.case = T), `Pseudomonas aeruginosa` = grepl(x = organism1,
pattern = "pseudomonasaeruginosa", ignore.case = T),
`Stenotrophmonas maltophilia` = grepl(x = organism1,
pattern = "stenotrophmonasmaltophilia", ignore.case = T),
`Helicobacter pylori` = grepl(x = organism1, pattern = "helicobacterpylori",
ignore.case = T), `Clostridium difficile` = grepl(x = organism1,
pattern = "clostridiumdifficile|clostridioidesdifficile",
ignore.case = T), `Bacteroides sp.` = grepl(x = organism1,
pattern = "bacteroides", ignore.case = T), `Culture Negative` = grepl(x = organism1,
pattern = "Culture Negative", ignore.case = T)) %>%
pivot_longer(-c(patientID:micro1.factor), names_to = "organisms",
values_to = "org_presence") %>%
mutate(organisms = ifelse(bact_infection_present == "No",
"No Bacterial Infection", organisms), org_presence = ifelse(org_presence ==
TRUE, 1, 0)) %>%
group_by(sampleID, infx_stool, bact_infection_present, organisms) %>%
dplyr::slice_max(org_presence) %>%
ungroup() %>%
filter(org_presence == 1) %>%
left_join(metaphlan_peri_anon %>%
select(-bact_infection_present) %>%
group_by(patientID, sampleID) %>%
filter(grepl(x = Species, pattern = "Klebsiella pneumoniae|Escherichia coli|Citrobacter freundii|Proteus mirabilis")) %>%
count(sampleID, wt = pctseqs, name = "enterobacterales_rel_abundance")) %>%
left_join(metaphlan_peri_anon %>%
select(patientID, sampleID, Kingdom:pctseqs)) %>%
group_by(patientID, sampleID, organisms, org_presence) %>%
dplyr::slice(1) %>%
mutate(sampleID = ifelse(is.na(sampleID), patientID, sampleID)) %>%
mutate(org_presence = ifelse(grepl(pattern = "No", x = bact_infection_present,
ignore.case = T), 0, org_presence)) %>%
ungroup() %>%
mutate(organisms = ifelse(bact_infection_present == "Yes" &
org_presence == 0, "Other", organisms)) %>%
arrange(-enterobacterales_rel_abundance) %>%
mutate(organisms = ifelse(grepl(x = organisms, pattern = "enterococcus|klebsiella|escherichia|proteus|citrobacter|culture",
ignore.case = TRUE), organisms, "Other Bacterial Infection"),
organisms = ifelse(bact_infection_present == "No", "No Bacterial Infection",
organisms))
ebac_infx_orgs_order <- ebac_infx_orgs %>%
group_by(sampleID, patientID, organisms) %>%
distinct(sampleID, patientID, .keep_all = T) %>%
ungroup() %>%
mutate(organisms = ifelse(grepl(organisms, pattern = "gallinarum"),
"Other Bacterial Infection", organisms), org_colors = case_when(grepl(x = organisms,
pattern = "enterococcus faecium", ignore.case = T) ~
1, grepl(x = organisms, pattern = "enterococcus faecalis",
ignore.case = T) ~ 2, grepl(x = organisms, pattern = "enterococcus avium",
ignore.case = T) ~ 3, grepl(x = organisms, pattern = "klebsiella pneumoniae",
ignore.case = T) ~ 4, grepl(x = organisms, pattern = "escherichia coli",
ignore.case = T) ~ 5, grepl(x = organisms, pattern = "proteus mirabilis",
ignore.case = T) ~ 6, grepl(x = organisms, pattern = "citrobacter freundii",
ignore.case = T) ~ 7, grepl(x = organisms, pattern = "other bacterial infection|gallinarum",
ignore.case = T) ~ 8, grepl(x = organisms, pattern = "culture negative",
ignore.case = T) ~ 9, TRUE ~ 0), organisms = as.factor(organisms),
organisms = factor(organisms, levels = c("Klebsiella pneumoniae",
"Escherichia coli", "Proteus mirabilis", "Citrobacter freundii",
"Enterococcus faecium", "Enterococcus faecalis",
"Enterococcus avium", "Other Bacterial Infection",
"Culture Negative", "No Bacterial Infection")), org_colors = factor(org_colors,
levels = c("4", "5", "6", "7", "1", "2", "3", "8",
"9", "0"))) %>%
left_join(ebac_infx_orgs) %>%
mutate(organisms = factor(organisms, levels = c("Klebsiella pneumoniae",
"Escherichia coli", "Proteus mirabilis", "Citrobacter freundii",
"Enterococcus faecium", "Enterococcus faecalis", "Enterococcus avium",
"Other Bacterial Infection", "Culture Negative", "No Bacterial Infection")),
org_colors = factor(org_colors, levels = c("4", "5",
"6", "7", "1", "2", "3", "8", "9", "0"))) %>%
drop_na(organisms) %>%
mutate(bact_infection_present = ifelse(bact_infection_present ==
"No", "No Infection", "Infection"), bact_infection_present = factor(bact_infection_present,
levels = c("Infection", "No Infection"))) %>%
ungroup() %>%
filter(bact_infection_present == "Infection") %>%
droplevels()
gg_ebac_infx_orgs <- ebac_infx_orgs_order %>%
ggplot(., aes(x = reorder(sampleID, -enterobacterales_rel_abundance),
y = organisms, fill = as.factor(org_colors))) + geom_tile(color = "black") +
theme_bw() + theme(axis.title.y = et(color = "black", size = 12),
axis.text.y = et(color = "black", size = 10), axis.ticks.x = eb(),
axis.text.x = eb(), axis.title.x = eb(), panel.grid = eb(),
strip.text = eb()) + scale_fill_manual(values = c("#FF0000",
"#CC0404", "#8A0202", "#5C0202", "#129246", "#0C7A3A", "#08592B",
"#E6C66E", "#BD992D", "#00000000"), labels = c("Klebsiella pneumoniae",
"Escherichia coli", "Citrobacter freundii", "Proteus mirabilis",
"Enterococcus faecium", "Enterococcus faecalis", "Enterococcus avium",
"Other Bacterial Infection", "Culture Negative", "No Bacterial Infection")) +
labs(fill = "Infecting Organism", y = "Infecting Organism\n") +
scale_y_discrete(expand = expansion(mult = c(0.005, 0.005)),
limits = rev(levels(ebac_infx_orgs_order$organisms))) +
facet_grid(. ~ bact_infection_present, space = "free_x",
scales = "free_x")
# gg_ebac_infx_orgs
yingtools2::gg.stack(gg_ebac_all_patients, gg_ebac_infx_orgs,
heights = c(1, 0.5), adjust.themes = T)pdf(file = "./Results/Figure_5C.pdf", height = 8, width = 10,
onefile = F)
yingtools2::gg.stack(gg_ebac_all_patients, gg_ebac_infx_orgs,
heights = c(1, 0.5), adjust.themes = T)
invisible(dev.off())
# Save object for combining later
fig_5c <- yingtools2::gg.stack(gg_ebac_all_patients + theme(plot.margin = margin(t = 5,
r = 5, b = 5, l = 5)), gg_ebac_infx_orgs + theme(legend.position = "none",
plot.margin = margin(t = 5, r = 5, b = 5, l = 5)), heights = c(1,
0.5), adjust.themes = TRUE, as.gtable = TRUE)pdf(file = "./Results/Figure_5ABCD.pdf", height = 16, width = 16,
onefile = F)
cowplot::plot_grid(fig_5a, fig_5b + theme(plot.margin = margin(t = 5,
r = 5, b = 25, l = 5)), fig_5c, fig_5d + theme(plot.margin = margin(t = 5,
r = 5, b = 25, l = 5)), align = "h", axis = "bl", ncol = 2,
rel_widths = c(1, 0.9))
invisible(dev.off())# Qual compounds that we can quantify and that are also
# significant in the volcano analysis for both
signif_compounds <- qual_tot_ecoc_expan %>%
rownames_to_column(var = "compound") %>%
filter(p.adj <= 0.05 & abs(log2fc_val) > 0.75) %>%
distinct(compound)
metab_boxplot_ecoc_expan <- peri_matrix_all %>%
select(sampleID, enterococcus_rel_abundance) %>%
left_join(sample_lookup) %>%
group_by(sampleID) %>%
slice(1) %>%
left_join(metab_quant_converted %>%
filter(compound %in% signif_compounds$compound)) %>%
filter(compound %!in% c("Kynurenine", "Anthranilic Acid")) %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate",
compound), compound = str_to_title(compound)) %>%
ungroup() %>%
mutate(db = ifelse(db == "HealthyDonor", "Healthy Donor",
"Liver Transplant") %>%
factor(., levels = c("Healthy Donor", "Liver Transplant")),
class = case_when(compound %in% c("Taurocholic Acid",
"Glycocholic Acid") ~ "Conjugated Primary Bile Acid",
compound %in% c("Cholic Acid") ~ "Primary Bile Acid",
compound %in% c("3-Oxolithocholic Acid", "Alloisolithocholic Acid",
"Deoxycholic Acid", "Isodeoxycholic Acid", "Lithocholic Acid") ~
"Secondary Bile Acid", compound %in% c("Threonine",
"Glycine", "Tyrosine", "Tyramine", "Serine",
"Leucine", "Isoleucine", "Valine", "Phenylalanine",
"Alanine", "Proline", "Aspartate", "Methionine",
"Glutamate", "Lysine", "Cysteine", "Tryptophan") ~
"Amino Acid", compound %in% c("Acetate", "Butyrate",
"Succinate", "Propionate") ~ "Short-Chain Fatty Acid",
compound %in% c("Kynurenic Acid", "Anthranilic Acid",
"Kynurenine", "Tryptamine") ~ "Kynurenine Metabolite",
compound == "Desaminotyrosine" ~ "Phenolic Aromatic",
compound == "Niacin" ~ "B-Vitamin", TRUE ~ "Indole")) %>%
drop_na() %>%
group_by(compound) %>%
mutate(class = factor(class, levels = c("Conjugated Primary Bile Acid",
"Primary Bile Acid", "Secondary Bile Acid", "Short-Chain Fatty Acid",
"Amino Acid", "Phenolic Aromatic", "Indole", "Kynurenine Metabolite",
"B-Vitamin")), compound = case_when(class == "Conjugated Primary Bile Acid" ~
paste(compound, "(1ËšConj. BA)"), class == "Primary Bile Acid" ~
paste(compound, "(1Ëš BA)"), class == "Secondary Bile Acid" ~
paste(compound, "(2Ëš BA)"), class == "Short-Chain Fatty Acid" ~
paste(compound, "(SCFA)"), class == "Amino Acid" ~ paste(compound,
"(AA)"), class == "Phenolic Aromatic" ~ paste(compound,
"(Phen. Arom.)"), class == "Indole" ~ paste0(compound,
"(Indole)"), class == "Kynurenine Metabolite" ~ paste(compound,
"(Kyn. Metab.)"), class == "B-Vitamin" ~ paste(compound,
"(B-Vitamin)")), compound = factor(compound, levels = c("Acetate (SCFA)",
"Butyrate (SCFA)", "Propionate (SCFA)", "Succinate (SCFA)",
"Taurocholic Acid (1ËšConj. BA)", "Glycocholic Acid (1ËšConj. BA)",
"Cholic Acid (1Ëš BA)", "3-Oxolithocholic Acid (2Ëš BA)",
"Alloisolithocholic Acid (2Ëš BA)", "Deoxycholic Acid (2Ëš BA)",
"Isodeoxycholic Acid (2Ëš BA)", "Lithocholic Acid (2Ëš BA)",
"Kynurenic Acid (Kyn. Metab.)", "Kynurenine (Kyn. Metab.)",
"Anthranilic Acid (Kyn. Metab.)", "Desaminotyrosine",
"Niacin", "Tyrosine", "Tryptamine", "Tryptophan", "Phenylalanine"))) %>%
ungroup() %>%
drop_na() %>%
mutate(enterococcus_expansion = ifelse(enterococcus_rel_abundance >=
optimal_cutpoint_rel$optimal_cutpoint[2], 1, 0)) %>%
arrange(enterococcus_expansion, sampleID) %>%
group_by(compound) %>%
mutate(enterococcus_expansion_0 = length(mvalue__mM[enterococcus_expansion ==
"0"]), enterococcus_expansion_1 = length(mvalue__mM[enterococcus_expansion ==
"1"])) %>%
filter(any(mvalue__mM != 0)) %>%
mutate(enterococcus_expansion = ifelse(enterococcus_expansion ==
"1", "Expansion", "No Expansion"))
metab_boxplot_summary_stats_ecoc_expan <- metab_boxplot_ecoc_expan %>%
group_by(compound) %>%
summarise(y.position = max(mvalue__mM) * 1.1)
metab_boxplot_stats_ecoc_expan <- metab_boxplot_ecoc_expan %>%
group_by(class, compound) %>%
rstatix::wilcox_test(mvalue__mM ~ enterococcus_expansion) %>%
rstatix::adjust_pvalue(method = "BH") %>%
rstatix::add_significance("p.adj") %>%
mutate(p.adj = ifelse(p.adj < 0.001, "p.adj < 0.001", paste("p.adj = ",
round(p.adj, 3)))) %>%
add_xy_position(x = "enterococcus_expansion") %>%
select(-y.position) %>%
left_join(metab_boxplot_summary_stats_ecoc_expan)
# gg_metab_boxplot_ecoc_expan <-
# ggboxplot(metab_boxplot_ecoc_expan, x =
# 'enterococcus_expansion', y = 'mvalue__mM', fill =
# 'enterococcus_expansion', alpha = 0.65, outlier.shape =
# NA ) + theme(legend.text = et(size = 14, color =
# 'black'), legend.title = et(size = 14, color = 'black'),
# axis.title.x = eb(), axis.title.y = et(size = 16, color =
# 'black'), strip.text = et(size = 14, color = 'black'),
# strip.background = eb(), panel.border = eb(), axis.line =
# eb()) + facet_wrap(~compound, scales = 'free_y', nrow =
# 2) + annotate('segment', x = 0.35, xend = 0.35, y = 0,
# yend = Inf, colour = 'black', linewidth = 1) +
# annotate('segment', x = 0.35, xend = Inf, y = 0, yend =
# 0, colour = 'black') +
# stat_pvalue_manual(metab_boxplot_stats_ecoc_expan, label
# = '{p.adj}', bracket.size = 0.5) + geom_point(data =
# metab_boxplot_ecoc_expan, aes(x = enterococcus_expansion,
# y = mvalue__mM, fill = enterococcus_expansion), position
# = position_jitter(width = 0.2), size = 2, shape = 21,
# alpha = 0.65, color = 'black') +
# scale_fill_manual('Enterococcus', values = c('gray87',
# '#389458')) + scale_y_continuous(expand = expansion(mult
# = c(0.1, 0.2))) + ylab('Concentration (mM)\n')
# gg_metab_boxplot_ecoc_expan cairo_pdf(filename =
# './Results/Figure_5B.pdf', height = 8, width = 16,
# onefile = TRUE) gg_metab_boxplot_ecoc_expan
# invisible(dev.off())diversity_metab_mat <-
metab_qual_anon %>% filter(sampleID %in% first_samps$sampleID) %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
compound = str_to_title(compound)) %>%
filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>%
ungroup() %>%
left_join(metaphlan_df_sumry %>% select(sampleID, diversity_group_abv)) %>%
group_by(sampleID, compound, diversity_group_abv) %>%
summarise(mvalue = mean(mvalue, na.rm = TRUE)) %>%
ungroup() %>%
mutate_all(~replace(., is.nan(.), NA)) %>%
select(sampleID, compound, mvalue, diversity_group_abv) %>%
drop_na(sampleID) %>%
pivot_wider(names_from = "compound", values_from = "mvalue") %>%
filter(sampleID != "") %>%
column_to_rownames(var = "sampleID") %>%
select(-`NA`, - diversity_group_abv) %>%
filter_all(any_vars(!is.na(.)))
diversity_metab_labs <-
diversity_metab_mat %>%
rownames_to_column(var = "sampleID") %>%
left_join(metaphlan_df_sumry %>% select(sampleID, diversity_group_abv)) %>%
droplevels() %>%
pull(diversity_group_abv)
dim(diversity_metab_mat) #101 93 (means 93 compounds and 101 LT patients/infections)## [1] 101 93
## [1] 101
# Begin model training
set.seed(1234)
diversity_train <- sample(1:nrow(diversity_metab_mat), as.integer(0.7*nrow(diversity_metab_mat))) # randomly select 70% samples in training
diversity_test <- setdiff(1:nrow(diversity_metab_mat), diversity_train) # rest is part of the test set
# store matrices into training and test set:
diversity_metab_mat.train <- diversity_metab_mat[diversity_train, ]
diversity_metab_mat.test <- diversity_metab_mat[diversity_test,]
diversity_metab_labs.train <- diversity_metab_labs[diversity_train]
diversity_metab_labs.test <- diversity_metab_labs[diversity_test]
# Train the model to tune hyperparameters
# Initial model to find optimal number of components to include
set.seed(1234)
diversity_train_splsda <- mixOmics::splsda(diversity_metab_mat.train, diversity_metab_labs.train, ncomp = 5)
# Performance assessment
## 5-fold, 50-repeat cross validation
set.seed(1234)
diversity_train_plsda_perf <-
perf(
diversity_train_splsda,
validation = "Mfold",
folds = 5,
progressBar = FALSE,
auc = TRUE,
nrepeat = 50
)
plot(
diversity_train_plsda_perf,
col = color.mixo(5:7),
sd = FALSE,
auc = TRUE,
legend.position = "horizontal"
) # ncomp = 4 might be best for classification error rate and max.dist# Number of optimal variables to select for each component
diversity_train_keepX <- c(1:10, seq(20, 130, 10))
set.seed(123)
diversity_train_tune_splsda <-
mixOmics::tune.splsda(
diversity_metab_mat.train,
diversity_metab_labs.train,
ncomp = 4, # Choose 4 components (max) to be safe
validation = 'Mfold',
folds = 5,
dist = 'max.dist',
progressBar = FALSE,
auc = TRUE,
measure = "BER",
test.keepX = diversity_train_keepX,
nrepeat = 50
)
plot(diversity_train_tune_splsda, col = color.jet(4))diversity_train_error <- diversity_train_tune_splsda$error.rate
diversity_train_ncomp <- diversity_train_tune_splsda$choice.ncomp$ncomp # optimal number of components based on t-tests on the error rate
diversity_train_ncomp #1 component is optimal## [1] 1
diversity_train_select_keepX <- diversity_train_tune_splsda$choice.keepX[1:ifelse(diversity_train_ncomp == 1, diversity_train_ncomp + 1, diversity_train_ncomp)] # optimal number of variables to select per component
diversity_train_select_keepX## comp1 comp2
## 80 8
# Final Model
diversity_train_splsda_final <-
mixOmics::splsda(diversity_metab_mat.train, diversity_metab_labs.train, ncomp = ifelse(diversity_train_ncomp == 1, diversity_train_ncomp + 1, diversity_train_ncomp), keepX = diversity_train_select_keepX)
# Test the model
diversity_predict_train_splsda_final <- predict(diversity_train_splsda_final, diversity_metab_mat.test,
dist = "max.dist")
diversity_predict_train_comp2 <- diversity_predict_train_splsda_final$class$max.dist[,ifelse(diversity_train_ncomp == 1, diversity_train_ncomp + 1, diversity_train_ncomp)]
diversity_union <- union(diversity_predict_train_comp2, diversity_metab_labs.test)
confusionMatrix(table(factor(diversity_predict_train_comp2, diversity_union,
levels = c("Low", "Medium", "High")),
factor(diversity_metab_labs.test, diversity_union,
levels = c("Low", "Medium", "High"))))## Confusion Matrix and Statistics
##
##
## Medium High Low
## Medium 10 4 0
## High 3 6 3
## Low 0 0 5
##
## Overall Statistics
##
## Accuracy : 0.6774
## 95% CI : (0.4863, 0.8332)
## No Information Rate : 0.4194
## P-Value [Acc > NIR] : 0.003321
##
## Kappa : 0.4992
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Medium Class: High Class: Low
## Sensitivity 0.7692 0.6000 0.6250
## Specificity 0.7778 0.7143 1.0000
## Pos Pred Value 0.7143 0.5000 1.0000
## Neg Pred Value 0.8235 0.7895 0.8846
## Prevalence 0.4194 0.3226 0.2581
## Detection Rate 0.3226 0.1935 0.1613
## Detection Prevalence 0.4516 0.3871 0.1613
## Balanced Accuracy 0.7735 0.6571 0.8125
diversity_train_background <- background.predict(diversity_train_splsda_final,
comp.predicted = 2,
xlim = c(-20,20),
ylim = c(-20,20),
dist = "centroids.dist")
# Model metrics for all samples
diversity_tot <- predict(diversity_train_splsda_final,
diversity_metab_mat,
dist = "max.dist")
diversity_tot_predict <- diversity_tot$class$max.dist[,ifelse(diversity_train_ncomp == 1, diversity_train_ncomp + 1, diversity_train_ncomp)]
diversity_tot_union <- union(diversity_tot_predict, diversity_metab_labs)
diversity_cm <- confusionMatrix(table(factor(diversity_tot_predict, diversity_tot_union,
levels = c("Low", "Medium", "High")),
factor(diversity_metab_labs, diversity_tot_union,
levels = c("Low", "Medium", "High"))),
)
diversity_cm## Confusion Matrix and Statistics
##
##
## Medium Low High
## Medium 30 11 2
## Low 6 27 6
## High 0 1 18
##
## Overall Statistics
##
## Accuracy : 0.7426
## 95% CI : (0.646, 0.8244)
## No Information Rate : 0.3861
## P-Value [Acc > NIR] : 3.744e-13
##
## Kappa : 0.6044
##
## Mcnemar's Test P-Value : 0.07057
##
## Statistics by Class:
##
## Class: Medium Class: Low Class: High
## Sensitivity 0.8333 0.6923 0.6923
## Specificity 0.8000 0.8065 0.9867
## Pos Pred Value 0.6977 0.6923 0.9474
## Neg Pred Value 0.8966 0.8065 0.9024
## Prevalence 0.3564 0.3861 0.2574
## Detection Rate 0.2970 0.2673 0.1782
## Detection Prevalence 0.4257 0.3861 0.1881
## Balanced Accuracy 0.8167 0.7494 0.8395
# Additional model measures
diversity_epi <- mltest::ml_test(predicted = factor(diversity_tot_predict, levels = c("Low", "Medium", "High")),
true = factor(diversity_metab_labs, levels = c("Low", "Medium", "High")))
diversity_cm_names <- diversity_cm$table
colnames(diversity_cm_names) <- c("Actual\nLow", "Actual\nMedium", "Actual\nHigh")
rownames(diversity_cm_names) <- c("Predicted\nLow", "Predicted\nMedium", "Predicted\nHigh")
diversity_confusion_df <- diversity_cm_names %>%
t()
# multiclass 95% CI
diversity_mc <-
biostatUtil::multiClassCM(
factor(diversity_metab_labs, levels = c("Low", "Medium", "High")),
factor(diversity_tot_predict, levels = c("Low", "Medium", "High")),
seed = 20,
num.boot = 1000,
conf.level = 0.95,
digits = 2,
method = "wilson"
)
diversity_mc_table <- diversity_mc$table %>%
as.data.frame() %>%
separate(., Low, into = c("X1", "X2", "X3", "X4"), sep = " ") %>%
select(Average, "Low_Avg" = X1, "Low_Lower" = X2, "Low_Upper" = X4, Medium, High) %>%
separate(., Medium, into = c("X1", "X2", "X3", "X4"), sep = " ") %>%
select(Average, Low_Avg, Low_Lower, Low_Upper, "Med_Avg" = X1, "Med_Lower" = X2, "Med_Upper" = X4, High) %>%
separate(., High, into = c("X1", "X2", "X3", "X4"), sep = " ") %>%
select(Average, Low_Avg, Low_Lower, Low_Upper, Med_Avg, Med_Lower, Med_Upper, "High_Avg" = X1, "High_Lower" = X2, "High_Upper" = X4) %>%
mutate_all(funs(str_replace(., "\\(|\\)", ""))) %>%
mutate_if(is.character,as.numeric)
{
pdf(file = "./Results/Figure_3A.pdf", height = 10, width = 10)
plotIndiv(
diversity_train_splsda_final,
xlim = c(min(diversity_train_splsda_final$variates$X[,1])*1.05, max(diversity_train_splsda_final$variates$X[,1])*1.8),
ylim = c(min(diversity_train_splsda_final$variates$X[,2])*1.15, max(diversity_train_splsda_final$variates$X[,2])*1.8),
comp = c(1,2),
pch = 1,
ind.names = FALSE,
legend = FALSE,
background = diversity_train_background,
col = c("#3A001E", "#8A0246", "#C20463"),
star = TRUE,
point.lwd = 0.5,
title = NULL,
size.title = 0.00001,
style = "graphics",
legend.title = "Diversity Group",
X.label = paste0("Component 1 (", round(diversity_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
Y.label = paste0("Component 2 (", round(diversity_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
)
addtable2plot(
y = -5.25,
x = min(diversity_train_splsda_final$variates$X[,1])*1.1,
diversity_confusion_df,
bty = "o",
display.rownames = TRUE,
hlines = TRUE,
vlines = TRUE,
cex = 0.75,
bg = "white"
)
text(
y = -5.5,
x = min(diversity_train_splsda_final$variates$X[,1])*1.1,
paste0("Overall ACC = ",
paste0(formatC(round(diversity_cm$overall[1], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(diversity_cm$overall[3], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(diversity_cm$overall[4], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = 3,
x = 1.5,
paste0("Low Diversity ACC = ", round(diversity_epi$balanced.accuracy[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
text(
y = 2.7,
x = 1.5,
paste0("Low Diversity Sens. = ", round(diversity_epi$precision[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
text(
y = 2.4,
x = 1.5,
paste0("Low Diversity Spec. = ", round(diversity_epi$specificity[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
# text(
# y = 2.1,
# x = 1.5,
# paste0("Low Diversity OR = ", round(diversity_epi$DOR[1], 1)),
# cex = 0.75, adj = 0, col = "#3A001E")
text(
y = -4.7,
x = 0,
paste0("Medium Diversity ACC = ", round(diversity_epi$balanced.accuracy[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
text(
y = -5,
x = 0,
paste0("Medium Diversity Sens. = ", round(diversity_epi$precision[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
text(
y = -5.3,
x = 0,
paste0("Medium Diversity Spec. = ", round(diversity_epi$specificity[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
# text(
# y = -5.9,
# x = 1.5,
# paste0("Medium Diversity OR = ", round(diversity_epi$DOR[2], 1)),
# cex = 0.75, adj = 0, col = "#8A0246")
text(
y = 3,
x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
paste0("High Diversity ACC = ", round(diversity_epi$balanced.accuracy[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")
text(
y = 2.7,
x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
paste0("High Diversity Sens. = ", round(diversity_epi$precision[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")
text(
y = 2.4,
x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
paste0("High Diversity Spec. = ", round(diversity_epi$specificity[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")
# text(
# y = 2.1,
# x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
# paste0("High Diversity OR = ", round(diversity_epi$DOR[3], 1)),
# cex = 0.75, adj = 0, col = "#C20463")
invisible(dev.off())
}
plotIndiv(
diversity_train_splsda_final,
xlim = c(min(diversity_train_splsda_final$variates$X[,1])*1.05, max(diversity_train_splsda_final$variates$X[,1])*1.8),
ylim = c(min(diversity_train_splsda_final$variates$X[,2])*1.15, max(diversity_train_splsda_final$variates$X[,2])*1.8),
comp = c(1,2),
pch = 1,
ind.names = FALSE,
legend = FALSE,
background = diversity_train_background,
col = c("#3A001E", "#8A0246", "#C20463"),
star = TRUE,
point.lwd = 0.5,
title = NULL,
size.title = 0.00001,
style = "graphics",
legend.title = "Diversity Group",
X.label = paste0("Component 1 (", round(diversity_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
Y.label = paste0("Component 2 (", round(diversity_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
)
addtable2plot(
y = -5.25,
x = min(diversity_train_splsda_final$variates$X[,1])*1.1,
diversity_confusion_df,
bty = "o",
display.rownames = TRUE,
hlines = TRUE,
vlines = TRUE,
cex = 0.75,
bg = "white"
)
text(
y = -5.5,
x = min(diversity_train_splsda_final$variates$X[,1])*1.1,
paste0("Overall ACC = ",
paste0(formatC(round(diversity_cm$overall[1], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(diversity_cm$overall[3], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(diversity_cm$overall[4], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = 3,
x = 1.5,
paste0("Low Diversity ACC = ", round(diversity_epi$balanced.accuracy[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
text(
y = 2.7,
x = 1.5,
paste0("Low Diversity Sens. = ", round(diversity_epi$precision[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
text(
y = 2.4,
x = 1.5,
paste0("Low Diversity Spec. = ", round(diversity_epi$specificity[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
# text(
# y = 2.1,
# x = 1.5,
# paste0("Low Diversity OR = ", round(diversity_epi$DOR[1], 1)),
# cex = 0.75, adj = 0, col = "#3A001E")
text(
y = -4.7,
x = 0,
paste0("Medium Diversity ACC = ", round(diversity_epi$balanced.accuracy[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
text(
y = -5,
x = 0,
paste0("Medium Diversity Sens. = ", round(diversity_epi$precision[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
text(
y = -5.3,
x = 0,
paste0("Medium Diversity Spec. = ", round(diversity_epi$specificity[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
# text(
# y = -5.9,
# x = 1.5,
# paste0("Medium Diversity OR = ", round(diversity_epi$DOR[2], 1)),
# cex = 0.75, adj = 0, col = "#8A0246")
text(
y = 3,
x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
paste0("High Diversity ACC = ", round(diversity_epi$balanced.accuracy[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")
text(
y = 2.7,
x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
paste0("High Diversity Sens. = ", round(diversity_epi$precision[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")
text(
y = 2.4,
x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
paste0("High Diversity Spec. = ", round(diversity_epi$specificity[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")ecoc_doms_metab_mat <-
metab_qual_anon %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
compound = str_to_title(compound)) %>%
filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>%
ungroup() %>%
group_by(sampleID, compound) %>%
summarise(mvalue = mean(mvalue, na.rm = TRUE)) %>%
ungroup() %>%
mutate_all(~replace(., is.nan(.), NA)) %>%
select(sampleID, compound, mvalue) %>%
drop_na(sampleID) %>%
pivot_wider(names_from = "compound", values_from = "mvalue") %>%
filter(sampleID != "") %>%
right_join(peri_matrix_all %>%
mutate(domination = case_when(enterococcus_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[2] #|
# enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1]
~ "Expansion",
TRUE ~ "No Expansion")) %>%
select(sampleID, domination)) %>%
select(-domination) %>%
column_to_rownames(var = "sampleID") %>%
select(-`NA`) %>%
filter_all(any_vars(!is.na(.)))
ecoc_doms_metab_labs <-
ecoc_doms_metab_mat %>%
rownames_to_column(var = "sampleID") %>%
left_join(peri_matrix_all %>%
group_by(patientID) %>%
mutate(domination = case_when(enterococcus_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[2] #|
# enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1]
~ "Expansion",
TRUE ~ "No Expansion")) %>%
select(sampleID, domination)) %>%
pull(domination)
dim(ecoc_doms_metab_mat) #107 93 (means 93 compounds and 108 LT patients)## [1] 107 93
## [1] 107
# Begin model training
set.seed(1234)
ecoc_doms_train <- sample(1:nrow(ecoc_doms_metab_mat), as.integer(0.7*nrow(ecoc_doms_metab_mat))) # randomly select 70% samples in training
ecoc_doms_test <- setdiff(1:nrow(ecoc_doms_metab_mat), ecoc_doms_train) # rest is part of the test set
# store matrices into training and test set:
ecoc_doms_metab_mat.train <- ecoc_doms_metab_mat[ecoc_doms_train, ]
ecoc_doms_metab_mat.test <- ecoc_doms_metab_mat[ecoc_doms_test,]
ecoc_doms_metab_labs.train <- ecoc_doms_metab_labs[ecoc_doms_train]
ecoc_doms_metab_labs.test <- ecoc_doms_metab_labs[ecoc_doms_test]
# Train the model to tune hyperparameters
# Initial model to find optimal number of components to include
set.seed(1234)
ecoc_doms_train_splsda <- mixOmics::splsda(ecoc_doms_metab_mat.train, ecoc_doms_metab_labs.train, ncomp = 5)
# Performance assessment
## 5-fold, 50-repeat cross validation
set.seed(1234)
ecoc_doms_train_plsda_perf <-
perf(
ecoc_doms_train_splsda,
validation = "Mfold",
folds = 5,
progressBar = FALSE,
auc = TRUE,
nrepeat = 50
)
plot(
ecoc_doms_train_plsda_perf,
col = color.mixo(5:7),
sd = FALSE,
auc = TRUE,
legend.position = "horizontal"
) # ncomp = 1 or 4 is best for classification error rate and max.dist# Number of optimal variables to select for each component
ecoc_doms_train_keepX <- c(1:10, seq(20, 108, 10))
set.seed(123)
ecoc_doms_train_tune_splsda <-
mixOmics::tune.splsda(
ecoc_doms_metab_mat.train,
ecoc_doms_metab_labs.train,
ncomp = 4, # Choose 4 components (max) to be safe
validation = 'Mfold',
folds = 5,
dist = 'max.dist',
progressBar = FALSE,
auc = TRUE,
measure = "BER",
test.keepX = ecoc_doms_train_keepX,
nrepeat = 50
)
plot(ecoc_doms_train_tune_splsda, col = color.jet(4))ecoc_doms_train_error <- ecoc_doms_train_tune_splsda$error.rate
ecoc_doms_train_ncomp <- ecoc_doms_train_tune_splsda$choice.ncomp$ncomp # optimal number of components based on t-tests on the error rate
# ecoc_doms_train_ncomp = 4 #4 components are optimal via visual inspection
ecoc_doms_train_select_keepX <- ecoc_doms_train_tune_splsda$choice.keepX[1:ifelse(ecoc_doms_train_ncomp == 1, ecoc_doms_train_ncomp + 1, ecoc_doms_train_ncomp)] # optimal number of variables to select per component
ecoc_doms_train_select_keepX## comp1 comp2
## 1 20
# Final Model
ecoc_doms_train_splsda_final <-
mixOmics::splsda(ecoc_doms_metab_mat.train, ecoc_doms_metab_labs.train, ncomp = ifelse(ecoc_doms_train_ncomp == 1, ecoc_doms_train_ncomp + 1, ecoc_doms_train_ncomp), keepX = ecoc_doms_train_select_keepX)
# Test the model
ecoc_doms_predict_train_splsda_final <- predict(ecoc_doms_train_splsda_final, ecoc_doms_metab_mat.test,
dist = "max.dist")
ecoc_doms_predict_train_comp2 <- ecoc_doms_predict_train_splsda_final$class$max.dist[,ifelse(ecoc_doms_train_ncomp == 1, ecoc_doms_train_ncomp + 1, ecoc_doms_train_ncomp)]
ecoc_doms_union <- union(ecoc_doms_predict_train_comp2, ecoc_doms_metab_labs.test)
confusionMatrix(table(factor(ecoc_doms_predict_train_comp2, ecoc_doms_union),
factor(ecoc_doms_metab_labs.test, ecoc_doms_union)),
negative = "0")## Confusion Matrix and Statistics
##
##
## No Expansion Expansion
## No Expansion 16 5
## Expansion 3 9
##
## Accuracy : 0.7576
## 95% CI : (0.5774, 0.8891)
## No Information Rate : 0.5758
## P-Value [Acc > NIR] : 0.02392
##
## Kappa : 0.4943
##
## Mcnemar's Test P-Value : 0.72367
##
## Sensitivity : 0.8421
## Specificity : 0.6429
## Pos Pred Value : 0.7619
## Neg Pred Value : 0.7500
## Prevalence : 0.5758
## Detection Rate : 0.4848
## Detection Prevalence : 0.6364
## Balanced Accuracy : 0.7425
##
## 'Positive' Class : No Expansion
##
ecoc_doms_train_background <- background.predict(ecoc_doms_train_splsda_final,
comp.predicted = 2,
xlim = c(-20,20),
ylim = c(-20,20),
dist = "centroids.dist")
# Model metrics for all samples
ecoc_doms_tot <- predict(ecoc_doms_train_splsda_final,
ecoc_doms_metab_mat,
dist = "max.dist")
ecoc_doms_tot_predict <- ecoc_doms_tot$class$max.dist[,ifelse(ecoc_doms_train_ncomp == 1, ecoc_doms_train_ncomp + 1, ecoc_doms_train_ncomp)]
ecoc_doms_tot_union <- union(ecoc_doms_tot_predict, ecoc_doms_metab_labs)
ecoc_doms_cm <- confusionMatrix(table(factor(ecoc_doms_tot_predict, ecoc_doms_tot_union,
levels = c("No Expansion","Expansion")),
factor(ecoc_doms_metab_labs, ecoc_doms_tot_union,
levels = c("No Expansion","Expansion"))),
positive = "Expansion")
ecoc_doms_cm## Confusion Matrix and Statistics
##
##
## No Expansion Expansion
## No Expansion 59 14
## Expansion 7 27
##
## Accuracy : 0.8037
## 95% CI : (0.7158, 0.8742)
## No Information Rate : 0.6168
## P-Value [Acc > NIR] : 2.546e-05
##
## Kappa : 0.5709
##
## Mcnemar's Test P-Value : 0.1904
##
## Sensitivity : 0.6585
## Specificity : 0.8939
## Pos Pred Value : 0.7941
## Neg Pred Value : 0.8082
## Prevalence : 0.3832
## Detection Rate : 0.2523
## Detection Prevalence : 0.3178
## Balanced Accuracy : 0.7762
##
## 'Positive' Class : Expansion
##
# Additional model measures
ecoc_doms_epi <- epiR::epi.tests(table(ecoc_doms_tot_predict, ecoc_doms_metab_labs), conf.level = 0.95)
ecoc_doms_confusion_df <- ecoc_doms_epi$tab %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "actual") %>%
mutate(actual = case_when(grepl(actual, pattern = "+", fixed = TRUE) ~ "Actual\nExpansion",
grepl(actual, pattern = "-", fixed = TRUE) ~ "Actual\nNo Expansion",
TRUE ~ "Total")) %>%
dplyr::rename("Predicted\nExpansion" = "Test +",
"Predicted\nNo Expansion" = "Test -") %>%
column_to_rownames(var = "actual")
{
pdf(file = "./Results/Figure_6A.pdf", height = 10, width = 10)
plotIndiv(
ecoc_doms_train_splsda_final,
comp = c(1,2),
pch = 1,
ind.names = FALSE,
legend = FALSE,
background = ecoc_doms_train_background,
col = c("#0C7A3A", "black"),
star = TRUE,
point.lwd = 0.5,
title = NULL,
size.title = 0.00001,
style = "graphics",
legend.title = "Expansion",
X.label = paste0("Component 1 (", round(ecoc_doms_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
Y.label = paste0("Component 2 (", round(ecoc_doms_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
)
addtable2plot(
y = min(ecoc_doms_train_splsda_final$variates$X[,2])*1.05,
x = min(ecoc_doms_train_splsda_final$variates$X[,1])*1.15,
ecoc_doms_confusion_df,
bty = "o",
display.rownames = TRUE,
hlines = TRUE,
vlines = TRUE,
cex = 0.75,
bg = "white"
)
text(
y = -3.5,
x = 1.5 ,
substitute(bold("Expansion")), cex = 1.75, adj = 0, col = "#0C7A3A")
text(
y = 3 ,
x = 0.75,
substitute(bold("No Expansion")), cex = 1.75, adj = 0, col = "black")
text(
y = min(ecoc_doms_train_splsda_final$variates$X[,2])*0.825,
x = max(ecoc_doms_train_splsda_final$variates$X[,1])*0.25,
paste0("ACC = ",
round(ecoc_doms_cm$overall[1]*100, 1), "% [",
round(ecoc_doms_cm$overall[3]*100, 1), "%, ",
round(ecoc_doms_cm$overall[4]*100, 1), "%]"),
cex = 0.75, adj = 0)
text(
y = min(ecoc_doms_train_splsda_final$variates$X[,2])*0.875,
x = max(ecoc_doms_train_splsda_final$variates$X[,1])*0.25,
paste0("Sens. = ",
paste0(formatC(round(ecoc_doms_epi$detail[2][3,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(ecoc_doms_epi$detail[3][3,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(ecoc_doms_epi$detail[4][3,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = min(ecoc_doms_train_splsda_final$variates$X[,2])*0.925,
x = max(ecoc_doms_train_splsda_final$variates$X[,1])*0.25,
paste0("Spec. = ",
paste0(formatC(round(ecoc_doms_epi$detail[2][4,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(ecoc_doms_epi$detail[3][4,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(ecoc_doms_epi$detail[4][4,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = min(ecoc_doms_train_splsda_final$variates$X[,2])*0.975,
x = max(ecoc_doms_train_splsda_final$variates$X[,1])*0.25,
paste0("OR = ",
formatC(round(ecoc_doms_epi$detail[2][6,], 3), digits = 1, format = "f"), " [",
formatC(round(ecoc_doms_epi$detail[3][6,], 3), digits = 1, format = "f"), ", ",
formatC(round(ecoc_doms_epi$detail[4][6,], 3), digits = 1, format = "f"), "]"),
cex = 0.75, adj = 0)
invisible(dev.off())
}
plotIndiv(
ecoc_doms_train_splsda_final,
comp = c(1,2),
pch = 1,
ind.names = FALSE,
legend = FALSE,
background = ecoc_doms_train_background,
col = c("#0C7A3A", "black"),
star = TRUE,
point.lwd = 0.5,
title = NULL,
size.title = 0.00001,
style = "graphics",
legend.title = "Expansion",
X.label = paste0("Component 1 (", round(ecoc_doms_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
Y.label = paste0("Component 2 (", round(ecoc_doms_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
)
addtable2plot(
y = min(ecoc_doms_train_splsda_final$variates$X[,2])*1.05,
x = min(ecoc_doms_train_splsda_final$variates$X[,1])*1.15,
ecoc_doms_confusion_df,
bty = "o",
display.rownames = TRUE,
hlines = TRUE,
vlines = TRUE,
cex = 0.75,
bg = "white"
)
text(
y = -3.5,
x = 1.5 ,
substitute(bold("Expansion")), cex = 1.75, adj = 0, col = "#0C7A3A")
text(
y = 3 ,
x = 0.75,
substitute(bold("No Expansion")), cex = 1.75, adj = 0, col = "black")
text(
y = min(ecoc_doms_train_splsda_final$variates$X[,2])*0.825,
x = max(ecoc_doms_train_splsda_final$variates$X[,1])*0.25,
paste0("ACC = ",
round(ecoc_doms_cm$overall[1]*100, 1), "% [",
round(ecoc_doms_cm$overall[3]*100, 1), "%, ",
round(ecoc_doms_cm$overall[4]*100, 1), "%]"),
cex = 0.75, adj = 0)
text(
y = min(ecoc_doms_train_splsda_final$variates$X[,2])*0.875,
x = max(ecoc_doms_train_splsda_final$variates$X[,1])*0.25,
paste0("Sens. = ",
paste0(formatC(round(ecoc_doms_epi$detail[2][3,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(ecoc_doms_epi$detail[3][3,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(ecoc_doms_epi$detail[4][3,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = min(ecoc_doms_train_splsda_final$variates$X[,2])*0.925,
x = max(ecoc_doms_train_splsda_final$variates$X[,1])*0.25,
paste0("Spec. = ",
paste0(formatC(round(ecoc_doms_epi$detail[2][4,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(ecoc_doms_epi$detail[3][4,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(ecoc_doms_epi$detail[4][4,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = min(ecoc_doms_train_splsda_final$variates$X[,2])*0.975,
x = max(ecoc_doms_train_splsda_final$variates$X[,1])*0.25,
paste0("OR = ",
formatC(round(ecoc_doms_epi$detail[2][6,], 3), digits = 1, format = "f"), " [",
formatC(round(ecoc_doms_epi$detail[3][6,], 3), digits = 1, format = "f"), ", ",
formatC(round(ecoc_doms_epi$detail[4][6,], 3), digits = 1, format = "f"), "]"),
cex = 0.75, adj = 0)ebac_doms_metab_mat <-
metab_qual_anon %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
compound = str_to_title(compound)) %>%
filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>%
ungroup() %>%
group_by(sampleID, compound) %>%
summarise(mvalue = mean(mvalue, na.rm = TRUE)) %>%
ungroup() %>%
mutate_all(~replace(., is.nan(.), NA)) %>%
select(sampleID, compound, mvalue) %>%
drop_na(sampleID) %>%
pivot_wider(names_from = "compound", values_from = "mvalue") %>%
filter(sampleID != "") %>%
right_join(peri_matrix_all %>%
mutate(domination = case_when(
#enterococcus_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[2] #|
enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1]
~ "Expansion",
TRUE ~ "No Expansion")) %>%
select(sampleID, domination)) %>%
select(-domination) %>%
column_to_rownames(var = "sampleID") %>%
select(-`NA`) %>%
filter_all(any_vars(!is.na(.)))
ebac_doms_metab_labs <-
ebac_doms_metab_mat %>%
rownames_to_column(var = "sampleID") %>%
left_join(peri_matrix_all %>%
mutate(domination = case_when(
# enterococcus_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[2] #|
enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1]
~ "Expansion",
TRUE ~ "No Expansion")) %>%
select(sampleID, domination)) %>%
pull(domination)
dim(ebac_doms_metab_mat) #107 93 (means 93 compounds and 108 LT patients)## [1] 107 93
## [1] 107
# Begin model training
set.seed(1234)
ebac_doms_train <- sample(1:nrow(ebac_doms_metab_mat), as.integer(0.7*nrow(ebac_doms_metab_mat))) # randomly select 70% samples in training
ebac_doms_test <- setdiff(1:nrow(ebac_doms_metab_mat), ebac_doms_train) # rest is part of the test set
# store matrices into training and test set:
ebac_doms_metab_mat.train <- ebac_doms_metab_mat[ebac_doms_train, ]
ebac_doms_metab_mat.test <- ebac_doms_metab_mat[ebac_doms_test,]
ebac_doms_metab_labs.train <- ebac_doms_metab_labs[ebac_doms_train]
ebac_doms_metab_labs.test <- ebac_doms_metab_labs[ebac_doms_test]
# Train the model to tune hyperparameters
# Initial model to find optimal number of components to include
set.seed(1234)
ebac_doms_train_splsda <- mixOmics::splsda(ebac_doms_metab_mat.train, ebac_doms_metab_labs.train, ncomp = 5)
# Performance assessment
## 5-fold, 50-repeat cross validation
set.seed(1234)
ebac_doms_train_plsda_perf <-
perf(
ebac_doms_train_splsda,
validation = "Mfold",
folds = 5,
progressBar = FALSE,
auc = TRUE,
nrepeat = 50
)
plot(
ebac_doms_train_plsda_perf,
col = color.mixo(5:7),
sd = FALSE,
auc = TRUE,
legend.position = "horizontal"
) # ncomp = 2 or 4 is best for classification error rate and max.dist# Number of optimal variables to select for each component
ebac_doms_train_keepX <- c(1:10, seq(20, 108, 10))
set.seed(123)
ebac_doms_train_tune_splsda <-
mixOmics::tune.splsda(
ebac_doms_metab_mat.train,
ebac_doms_metab_labs.train,
ncomp = 4, # Choose 4 components (max) to be safe
validation = 'Mfold',
folds = 5,
dist = 'max.dist',
progressBar = FALSE,
auc = TRUE,
measure = "BER",
test.keepX = ebac_doms_train_keepX,
nrepeat = 50
)
plot(ebac_doms_train_tune_splsda, col = color.jet(4))ebac_doms_train_error <- ebac_doms_train_tune_splsda$error.rate
ebac_doms_train_ncomp <- ebac_doms_train_tune_splsda$choice.ncomp$ncomp # optimal number of components based on t-tests on the error rate
ebac_doms_train_ncomp #1 components are optimal## [1] 1
ebac_doms_train_select_keepX <- ebac_doms_train_tune_splsda$choice.keepX[1:ifelse(ebac_doms_train_ncomp == 1, ebac_doms_train_ncomp + 1, ebac_doms_train_ncomp)] # optimal number of variables to select per component
ebac_doms_train_select_keepX## comp1 comp2
## 1 10
# Final Model
ebac_doms_train_splsda_final <-
mixOmics::splsda(ebac_doms_metab_mat.train, ebac_doms_metab_labs.train, ncomp = ifelse(ebac_doms_train_ncomp == 1, ebac_doms_train_ncomp + 1, ebac_doms_train_ncomp), keepX = ebac_doms_train_select_keepX)
# Test the model
ebac_doms_predict_train_splsda_final <- predict(ebac_doms_train_splsda_final, ebac_doms_metab_mat.test,
dist = "max.dist")
ebac_doms_predict_train_comp2 <- ebac_doms_predict_train_splsda_final$class$max.dist[,ifelse(ebac_doms_train_ncomp == 1, ebac_doms_train_ncomp + 1, ebac_doms_train_ncomp)]
ebac_doms_union <- union(ebac_doms_predict_train_comp2, ebac_doms_metab_labs.test)
confusionMatrix(table(factor(ebac_doms_predict_train_comp2, ebac_doms_union),
factor(ebac_doms_metab_labs.test, ebac_doms_union)),
negative = "0")## Confusion Matrix and Statistics
##
##
## Expansion No Expansion
## Expansion 1 1
## No Expansion 6 25
##
## Accuracy : 0.7879
## 95% CI : (0.6109, 0.9102)
## No Information Rate : 0.7879
## P-Value [Acc > NIR] : 0.5994
##
## Kappa : 0.1413
##
## Mcnemar's Test P-Value : 0.1306
##
## Sensitivity : 0.14286
## Specificity : 0.96154
## Pos Pred Value : 0.50000
## Neg Pred Value : 0.80645
## Prevalence : 0.21212
## Detection Rate : 0.03030
## Detection Prevalence : 0.06061
## Balanced Accuracy : 0.55220
##
## 'Positive' Class : Expansion
##
ebac_doms_train_background <- background.predict(ebac_doms_train_splsda_final,
comp.predicted = 2,
xlim = c(-20,20),
ylim = c(-20,20),
dist = "centroids.dist")
# Model metrics for all samples
ebac_doms_tot <- predict(ebac_doms_train_splsda_final,
ebac_doms_metab_mat,
dist = "max.dist")
ebac_doms_tot_predict <- ebac_doms_tot$class$max.dist[,ifelse(ebac_doms_train_ncomp == 1, ebac_doms_train_ncomp + 1, ebac_doms_train_ncomp)]
ebac_doms_tot_union <- union(ebac_doms_tot_predict, ebac_doms_metab_labs)
ebac_doms_cm <- confusionMatrix(table(factor(ebac_doms_tot_predict, ebac_doms_tot_union,
levels = c("No Expansion","Expansion")),
factor(ebac_doms_metab_labs, ebac_doms_tot_union,
levels = c("No Expansion","Expansion"))),
positive = "Expansion")
ebac_doms_cm## Confusion Matrix and Statistics
##
##
## Expansion No Expansion
## Expansion 78 20
## No Expansion 1 8
##
## Accuracy : 0.8037
## 95% CI : (0.7158, 0.8742)
## No Information Rate : 0.7383
## P-Value [Acc > NIR] : 0.07337
##
## Kappa : 0.3496
##
## Mcnemar's Test P-Value : 8.568e-05
##
## Sensitivity : 0.9873
## Specificity : 0.2857
## Pos Pred Value : 0.7959
## Neg Pred Value : 0.8889
## Prevalence : 0.7383
## Detection Rate : 0.7290
## Detection Prevalence : 0.9159
## Balanced Accuracy : 0.6365
##
## 'Positive' Class : Expansion
##
# Additional model measures
ebac_doms_epi <- epiR::epi.tests(table(ebac_doms_tot_predict, ebac_doms_metab_labs), conf.level = 0.95)
ebac_doms_confusion_df <- ebac_doms_epi$tab %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "actual") %>%
mutate(actual = case_when(grepl(actual, pattern = "+", fixed = TRUE) ~ "Actual\nExpansion",
grepl(actual, pattern = "-", fixed = TRUE) ~ "Actual\nNo Expansion",
TRUE ~ "Total")) %>%
dplyr::rename("Predicted\nExpansion" = "Test +",
"Predicted\nNo Expansion" = "Test -") %>%
column_to_rownames(var = "actual")
{
pdf(file = "./Results/Figure_6C.pdf", height = 10, width = 10)
plotIndiv(
ebac_doms_train_splsda_final,
comp = c(1,2),
ylim = c(-5,16),
pch = 1,
ind.names = FALSE,
legend = FALSE,
background = ebac_doms_train_background,
col = c("#FF0000", "black"),
star = TRUE,
point.lwd = 0.5,
title = NULL,
size.title = 0.00001,
style = "graphics",
legend.title = "Expansion",
X.label = paste0("Component 1 (", round(ebac_doms_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
Y.label = paste0("Component 2 (", round(ebac_doms_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
)
addtable2plot(
y = min(ebac_doms_train_splsda_final$variates$X[,2])*-3,
x = min(ebac_doms_train_splsda_final$variates$X[,1])*-10,
ebac_doms_confusion_df,
bty = "o",
display.rownames = TRUE,
hlines = TRUE,
vlines = TRUE,
cex = 0.75,
bg = "white"
)
text(
y = 12,
x = 1.25,
substitute(bold("Expansion")), cex = 1.75, adj = 0, col = "#FF0000")
text(
y = -4,
x = 0.5,
substitute(bold("No Expansion")), cex = 1.75, adj = 0, col = "black")
text(
y = min(ebac_doms_train_splsda_final$variates$X[,2])*-2.25,
x = min(ebac_doms_train_splsda_final$variates$X[,1])*-12,
paste0("ACC = ",
round(ebac_doms_cm$overall[1]*100, 1), "% [",
round(ebac_doms_cm$overall[3]*100, 1), "%, ",
round(ebac_doms_cm$overall[4]*100, 1), "%]"),
cex = 0.75, adj = 0)
text(
y = min(ebac_doms_train_splsda_final$variates$X[,2])*-2.05,
x = min(ebac_doms_train_splsda_final$variates$X[,1])*-12,
paste0("Sens. = ",
paste0(formatC(round(ebac_doms_epi$detail[2][3,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(ebac_doms_epi$detail[3][3,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(ebac_doms_epi$detail[4][3,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = min(ebac_doms_train_splsda_final$variates$X[,2])*-1.85,
x = min(ebac_doms_train_splsda_final$variates$X[,1])*-12,
paste0("Spec. = ",
paste0(formatC(round(ebac_doms_epi$detail[2][4,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(ebac_doms_epi$detail[3][4,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(ebac_doms_epi$detail[4][4,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = min(ebac_doms_train_splsda_final$variates$X[,2])*-1.65,
x = min(ebac_doms_train_splsda_final$variates$X[,1])*-12,
paste0("OR = ",
formatC(round(ebac_doms_epi$detail[2][6,], 3), digits = 1, format = "f"), " [",
formatC(round(ebac_doms_epi$detail[3][6,], 3), digits = 1, format = "f"), ", ",
formatC(round(ebac_doms_epi$detail[4][6,], 3), digits = 1, format = "f"), "]"),
cex = 0.75, adj = 0)
invisible(dev.off())
}
plotIndiv(
ebac_doms_train_splsda_final,
comp = c(1,2),
ylim = c(-5,16),
pch = 1,
ind.names = FALSE,
legend = FALSE,
background = ebac_doms_train_background,
col = c("#FF0000", "black"),
star = TRUE,
point.lwd = 0.5,
title = NULL,
size.title = 0.00001,
style = "graphics",
legend.title = "Expansion",
X.label = paste0("Component 1 (", round(ebac_doms_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
Y.label = paste0("Component 2 (", round(ebac_doms_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
)
addtable2plot(
y = min(ebac_doms_train_splsda_final$variates$X[,2])*-3,
x = min(ebac_doms_train_splsda_final$variates$X[,1])*-10,
ebac_doms_confusion_df,
bty = "o",
display.rownames = TRUE,
hlines = TRUE,
vlines = TRUE,
cex = 0.75,
bg = "white"
)
text(
y = 12,
x = 1.25,
substitute(bold("Expansion")), cex = 1.75, adj = 0, col = "#FF0000")
text(
y = -4,
x = 0.5,
substitute(bold("No Expansion")), cex = 1.75, adj = 0, col = "black")
text(
y = min(ebac_doms_train_splsda_final$variates$X[,2])*-2.25,
x = min(ebac_doms_train_splsda_final$variates$X[,1])*-12,
paste0("ACC = ",
round(ebac_doms_cm$overall[1]*100, 1), "% [",
round(ebac_doms_cm$overall[3]*100, 1), "%, ",
round(ebac_doms_cm$overall[4]*100, 1), "%]"),
cex = 0.75, adj = 0)
text(
y = min(ebac_doms_train_splsda_final$variates$X[,2])*-2.05,
x = min(ebac_doms_train_splsda_final$variates$X[,1])*-12,
paste0("Sens. = ",
paste0(formatC(round(ebac_doms_epi$detail[2][3,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(ebac_doms_epi$detail[3][3,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(ebac_doms_epi$detail[4][3,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = min(ebac_doms_train_splsda_final$variates$X[,2])*-1.85,
x = min(ebac_doms_train_splsda_final$variates$X[,1])*-12,
paste0("Spec. = ",
paste0(formatC(round(ebac_doms_epi$detail[2][4,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(ebac_doms_epi$detail[3][4,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(ebac_doms_epi$detail[4][4,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = min(ebac_doms_train_splsda_final$variates$X[,2])*-1.65,
x = min(ebac_doms_train_splsda_final$variates$X[,1])*-12,
paste0("OR = ",
formatC(round(ebac_doms_epi$detail[2][6,], 3), digits = 1, format = "f"), " [",
formatC(round(ebac_doms_epi$detail[3][6,], 3), digits = 1, format = "f"), ", ",
formatC(round(ebac_doms_epi$detail[4][6,], 3), digits = 1, format = "f"), "]"),
cex = 0.75, adj = 0)infx_metab_mat <-
metab_qual_anon %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
compound = str_to_title(compound)) %>%
filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>%
ungroup() %>%
right_join(peri_matrix_all %>% select(sampleID, bact_infection_present)) %>%
group_by(sampleID, compound, bact_infection_present) %>%
summarise(mvalue = mean(mvalue, na.rm = TRUE)) %>%
ungroup() %>%
mutate_all(~replace(., is.nan(.), NA)) %>%
select(sampleID, compound, mvalue, bact_infection_present) %>%
drop_na(sampleID) %>%
pivot_wider(names_from = "compound", values_from = "mvalue") %>%
filter(sampleID != "") %>%
mutate(bact_infection_present = ifelse(grepl(x = bact_infection_present, pattern = "No"), "No Infection", "Infection")) %>%
select(-bact_infection_present) %>%
column_to_rownames(var = "sampleID") %>%
select(-`NA`) %>%
filter_all(any_vars(!is.na(.)))
infx_metab_labs <-
infx_metab_mat %>%
rownames_to_column(var = "sampleID") %>%
left_join(peri_matrix_all %>% select(sampleID, bact_infection_present)) %>%
mutate(bact_infection_present = ifelse(grepl(x = bact_infection_present, pattern = "No"), "No Infection", "Infection")) %>%
pull(bact_infection_present)
dim(infx_metab_mat) #107 93 (means 93 compounds and 107 LT patients/infections)## [1] 107 93
## [1] 107
# Begin model training
set.seed(1234)
infx_train <- sample(1:nrow(infx_metab_mat), as.integer(0.7*nrow(infx_metab_mat))) # randomly select 70% samples in training
infx_test <- setdiff(1:nrow(infx_metab_mat), infx_train) # rest is part of the test set
# store matrices into training and test set:
infx_metab_mat.train <- infx_metab_mat[infx_train, ]
infx_metab_mat.test <- infx_metab_mat[infx_test,]
infx_metab_labs.train <- infx_metab_labs[infx_train]
infx_metab_labs.test <- infx_metab_labs[infx_test]
# Train the model to tune hyperparameters
# Initial model to find optimal number of components to include
set.seed(1234)
infx_train_splsda <- mixOmics::splsda(infx_metab_mat.train, infx_metab_labs.train, ncomp = 5)
# Performance assessment
## 5-fold, 100-repeat cross validation
set.seed(1234)
infx_train_plsda_perf <-
perf(
infx_train_splsda,
validation = "Mfold",
folds = 5,
progressBar = FALSE,
auc = TRUE,
nrepeat = 50
)
plot(
infx_train_plsda_perf,
col = color.mixo(5:7),
sd = FALSE,
auc = TRUE,
legend.position = "horizontal"
) # ncomp = 1 is best for classification error rate and max.dist# Number of optimal variables to select for each component
infx_train_keepX <- c(1:10, seq(20, 108, 10))
set.seed(123)
infx_train_tune_splsda <-
mixOmics::tune.splsda(
infx_metab_mat.train,
infx_metab_labs.train,
ncomp = 3, # Choose 3 components (max) to be safe
validation = 'Mfold',
folds = 5,
dist = 'max.dist',
progressBar = FALSE,
auc = TRUE,
measure = "BER",
test.keepX = infx_train_keepX,
nrepeat = 50
)
plot(infx_train_tune_splsda, col = color.jet(3))infx_train_error <- infx_train_tune_splsda$error.rate
infx_train_ncomp <- infx_train_tune_splsda$choice.ncomp$ncomp # optimal number of components based on t-tests on the error rate
infx_train_ncomp #1 component is optimal## [1] 1
infx_train_select_keepX <- infx_train_tune_splsda$choice.keepX[1:(infx_train_ncomp + 1)] # optimal number of variables to select per component
infx_train_select_keepX## comp1 comp2
## 90 3
# Final Model
infx_train_splsda_final <-
mixOmics::splsda(infx_metab_mat.train, infx_metab_labs.train, ncomp = (infx_train_ncomp + 1), keepX = infx_train_select_keepX)
# Test the model
infx_predict_train_splsda_final <- predict(infx_train_splsda_final, infx_metab_mat.test,
dist = "max.dist")
infx_predict_train_comp2 <- infx_predict_train_splsda_final$class$max.dist[,(infx_train_ncomp + 1)]
infx_union <- union(infx_predict_train_comp2, infx_metab_labs.test)
confusionMatrix(table(factor(infx_predict_train_comp2, infx_union),
factor(infx_metab_labs.test, infx_union)),
positive = "Infection")## Confusion Matrix and Statistics
##
##
## No Infection Infection
## No Infection 20 6
## Infection 6 1
##
## Accuracy : 0.6364
## 95% CI : (0.4512, 0.796)
## No Information Rate : 0.7879
## P-Value [Acc > NIR] : 0.9865
##
## Kappa : -0.0879
##
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.1429
## Specificity : 0.7692
## Pos Pred Value : 0.1429
## Neg Pred Value : 0.7692
## Prevalence : 0.2121
## Detection Rate : 0.0303
## Detection Prevalence : 0.2121
## Balanced Accuracy : 0.4560
##
## 'Positive' Class : Infection
##
infx_train_background <- background.predict(infx_train_splsda_final,
comp.predicted = 2,
xlim = c(-20,20),
ylim = c(-20,20),
dist = "centroids.dist")
# Model metrics for all samples
infx_tot <- predict(infx_train_splsda_final,
infx_metab_mat,
dist = "max.dist")
infx_tot_predict <- infx_tot$class$max.dist[,(infx_train_ncomp + 1)]
infx_tot_union <- union(infx_tot_predict, infx_metab_labs)
infx_cm <- confusionMatrix(table(factor(infx_tot_predict, infx_tot_union,
levels = c("Infection", "No Infection")),
factor(infx_metab_labs, infx_tot_union,
levels = c("Infection", "No Infection"))),
positive = "Infection")
infx_cm## Confusion Matrix and Statistics
##
##
## No Infection Infection
## No Infection 12 7
## Infection 15 73
##
## Accuracy : 0.7944
## 95% CI : (0.7054, 0.8664)
## No Information Rate : 0.7477
## P-Value [Acc > NIR] : 0.1582
##
## Kappa : 0.3958
##
## Mcnemar's Test P-Value : 0.1356
##
## Sensitivity : 0.9125
## Specificity : 0.4444
## Pos Pred Value : 0.8295
## Neg Pred Value : 0.6316
## Prevalence : 0.7477
## Detection Rate : 0.6822
## Detection Prevalence : 0.8224
## Balanced Accuracy : 0.6785
##
## 'Positive' Class : Infection
##
# Additional model measures
infx_epi <- epiR::epi.tests(table(infx_tot_predict, infx_metab_labs), conf.level = 0.95)
infx_confusion_df <- infx_epi$tab %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "actual") %>%
mutate(actual = case_when(grepl(actual, pattern = "+", fixed = TRUE) ~ "Actual\nInfection",
grepl(actual, pattern = "-", fixed = TRUE) ~ "Actual\nNo Infection",
TRUE ~ "Total")) %>%
dplyr::rename("Predicted\nInfection" = "Test +",
"Predicted\nNo Infection" = "Test -") %>%
column_to_rownames(var = "actual")
{
pdf(file = "./Results/Figure_7A.pdf", height = 10, width = 10)
plotIndiv(
infx_train_splsda_final,
comp = c(1,2),
pch = 1,
ind.names = FALSE,
legend = FALSE,
background = infx_train_background,
col = c("goldenrod", "gray75"),
star = TRUE,
point.lwd = 0.5,
title = NULL,
size.title = 0.00001,
style = "graphics",
legend.title = "Infection Group",
X.label = paste0("Component 1 (", round(infx_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
Y.label = paste0("Component 2 (", round(infx_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
)
addtable2plot(
y = max(infx_train_splsda_final$variates$X[,2])*0.65,
x = max(infx_train_splsda_final$variates$X[,1])*0.4,
infx_confusion_df,
bty = "o",
display.rownames = TRUE,
hlines = TRUE,
vlines = TRUE,
cex = 0.75,
bg = "white"
)
text(
y = 0,
x = 6.5,
substitute(bold("Infection")), cex = 1.75, adj = 0, col = "goldenrod")
text(
y = 4,
x = -4.5,
substitute(bold("No Infection")), cex = 1.75, adj = 0, col = "gray70")
text(
y = max(infx_train_splsda_final$variates$X[,2])*0.6,
x = max(infx_train_splsda_final$variates$X[,1])*0.5,
paste0("ACC = ",
round(infx_cm$overall[1]*100, 1), "% [",
round(infx_cm$overall[3]*100, 1), "%, ",
round(infx_cm$overall[4]*100, 1), "%]"),
cex = 0.75, adj = 0)
text(
y = max(infx_train_splsda_final$variates$X[,2])*0.55,
x = max(infx_train_splsda_final$variates$X[,1])*0.5,
paste0("Sens. = ",
paste0(formatC(round(infx_epi$detail[2][3,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(infx_epi$detail[3][3,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(infx_epi$detail[4][3,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = max(infx_train_splsda_final$variates$X[,2])*0.5,
x = max(infx_train_splsda_final$variates$X[,1])*0.5,
paste0("Spec. = ",
paste0(formatC(round(infx_epi$detail[2][4,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(infx_epi$detail[3][4,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(infx_epi$detail[4][4,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = max(infx_train_splsda_final$variates$X[,2])*0.45,
x = max(infx_train_splsda_final$variates$X[,1])*0.5,
paste0("OR = ",
formatC(round(infx_epi$detail[2][6,], 3), digits = 1, format = "f"), " [",
formatC(round(infx_epi$detail[3][6,], 3), digits = 1, format = "f"), ", ",
formatC(round(infx_epi$detail[4][6,], 3), digits = 1, format = "f"), "]"),
cex = 0.75, adj = 0)
invisible(dev.off())
}
plotIndiv(
infx_train_splsda_final,
comp = c(1,2),
pch = 1,
ind.names = FALSE,
legend = FALSE,
background = infx_train_background,
col = c("goldenrod", "gray75"),
star = TRUE,
point.lwd = 0.5,
title = NULL,
size.title = 0.00001,
style = "graphics",
legend.title = "Infection Group",
X.label = paste0("Component 1 (", round(infx_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
Y.label = paste0("Component 2 (", round(infx_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
)
addtable2plot(
y = max(infx_train_splsda_final$variates$X[,2])*0.65,
x = max(infx_train_splsda_final$variates$X[,1])*0.4,
infx_confusion_df,
bty = "o",
display.rownames = TRUE,
hlines = TRUE,
vlines = TRUE,
cex = 0.75,
bg = "white"
)
text(
y = 0,
x = 6.5,
substitute(bold("Infection")), cex = 1.75, adj = 0, col = "goldenrod")
text(
y = 4,
x = -4.5,
substitute(bold("No Infection")), cex = 1.75, adj = 0, col = "gray70")
text(
y = max(infx_train_splsda_final$variates$X[,2])*0.6,
x = max(infx_train_splsda_final$variates$X[,1])*0.5,
paste0("ACC = ",
round(infx_cm$overall[1]*100, 1), "% [",
round(infx_cm$overall[3]*100, 1), "%, ",
round(infx_cm$overall[4]*100, 1), "%]"),
cex = 0.75, adj = 0)
text(
y = max(infx_train_splsda_final$variates$X[,2])*0.55,
x = max(infx_train_splsda_final$variates$X[,1])*0.5,
paste0("Sens. = ",
paste0(formatC(round(infx_epi$detail[2][3,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(infx_epi$detail[3][3,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(infx_epi$detail[4][3,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = max(infx_train_splsda_final$variates$X[,2])*0.5,
x = max(infx_train_splsda_final$variates$X[,1])*0.5,
paste0("Spec. = ",
paste0(formatC(round(infx_epi$detail[2][4,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(infx_epi$detail[3][4,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(infx_epi$detail[4][4,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = max(infx_train_splsda_final$variates$X[,2])*0.45,
x = max(infx_train_splsda_final$variates$X[,1])*0.5,
paste0("OR = ",
formatC(round(infx_epi$detail[2][6,], 3), digits = 1, format = "f"), " [",
formatC(round(infx_epi$detail[3][6,], 3), digits = 1, format = "f"), ", ",
formatC(round(infx_epi$detail[4][6,], 3), digits = 1, format = "f"), "]"),
cex = 0.75, adj = 0)axis.title.cex = 3.275
axis.text.cex = 2.5
plot.text.cex = 2.5
plot.group.cex = 2.5
point.cex = 3
{
pdf(file = "./Results/Figure_6AC.pdf", height = 17, width = 11)
par(mfrow = c(2, 1), mar = c(5, 5, 1, 1))
par(mar = c(5, 5, 2, 1))
# Figure 6A
plotIndiv(ecoc_doms_train_splsda_final, comp = c(1, 2), pch = 1,
ind.names = FALSE, legend = FALSE, background = ecoc_doms_train_background,
col = c("#0C7A3A", "black"), star = TRUE, point.lwd = 0.5,
title = NULL, size.title = 1e-05, style = "graphics",
legend.title = "Expansion", X.label = paste0("Component 1 (",
round(ecoc_doms_train_splsda_final$prop_expl_var$X[1] *
100), "%)"), Y.label = paste0("Component 2 (",
round(ecoc_doms_train_splsda_final$prop_expl_var$X[2] *
100), "%)"))
addtable2plot(y = min(ecoc_doms_train_splsda_final$variates$X[,
2]) * 1.05, x = min(ecoc_doms_train_splsda_final$variates$X[,
1]) * 1.15, ecoc_doms_confusion_df, bty = "o", display.rownames = TRUE,
hlines = TRUE, vlines = TRUE, cex = 0.75, bg = "white")
text(y = -3.5, x = 1.5, substitute(bold("Expansion")), cex = 1.75,
adj = 0, col = "#0C7A3A")
text(y = 3, x = 0.75, substitute(bold("No Expansion")), cex = 1.75,
adj = 0, col = "black")
text(y = min(ecoc_doms_train_splsda_final$variates$X[, 2]) *
0.825, x = max(ecoc_doms_train_splsda_final$variates$X[,
1]) * 0.25, paste0("ACC = ", round(ecoc_doms_cm$overall[1] *
100, 1), "% [", round(ecoc_doms_cm$overall[3] * 100,
1), "%, ", round(ecoc_doms_cm$overall[4] * 100, 1), "%]"),
cex = 0.75, adj = 0)
text(y = min(ecoc_doms_train_splsda_final$variates$X[, 2]) *
0.875, x = max(ecoc_doms_train_splsda_final$variates$X[,
1]) * 0.25, paste0("Sens. = ", paste0(formatC(round(ecoc_doms_epi$detail[2][3,
], 3) * 100, digits = 1, format = "f"), "%"), " [", paste0(formatC(round(ecoc_doms_epi$detail[3][3,
], 3) * 100, digits = 1, format = "f"), "%"), ", ", paste0(formatC(round(ecoc_doms_epi$detail[4][3,
], 3) * 100, digits = 1, format = "f"), "%"), "]"), cex = 0.75,
adj = 0)
text(y = min(ecoc_doms_train_splsda_final$variates$X[, 2]) *
0.925, x = max(ecoc_doms_train_splsda_final$variates$X[,
1]) * 0.25, paste0("Spec. = ", paste0(formatC(round(ecoc_doms_epi$detail[2][4,
], 3) * 100, digits = 1, format = "f"), "%"), " [", paste0(formatC(round(ecoc_doms_epi$detail[3][4,
], 3) * 100, digits = 1, format = "f"), "%"), ", ", paste0(formatC(round(ecoc_doms_epi$detail[4][4,
], 3) * 100, digits = 1, format = "f"), "%"), "]"), cex = 0.75,
adj = 0)
text(y = min(ecoc_doms_train_splsda_final$variates$X[, 2]) *
0.975, x = max(ecoc_doms_train_splsda_final$variates$X[,
1]) * 0.25, paste0("OR = ", formatC(round(ecoc_doms_epi$detail[2][6,
], 3), digits = 1, format = "f"), " [", formatC(round(ecoc_doms_epi$detail[3][6,
], 3), digits = 1, format = "f"), ", ", formatC(round(ecoc_doms_epi$detail[4][6,
], 3), digits = 1, format = "f"), "]"), cex = 0.75, adj = 0)
# Figure 6C
par(mar = c(5, 5, 2, 1))
plotIndiv(ebac_doms_train_splsda_final, comp = c(1, 2), ylim = c(-5,
16), pch = 1, ind.names = FALSE, legend = FALSE, background = ebac_doms_train_background,
col = c("#FF0000", "black"), star = TRUE, point.lwd = 0.5,
title = NULL, size.title = 1e-05, style = "graphics",
legend.title = "Expansion", X.label = paste0("Component 1 (",
round(ebac_doms_train_splsda_final$prop_expl_var$X[1] *
100), "%)"), Y.label = paste0("Component 2 (",
round(ebac_doms_train_splsda_final$prop_expl_var$X[2] *
100), "%)"))
addtable2plot(y = min(ebac_doms_train_splsda_final$variates$X[,
2]) * -3, x = min(ebac_doms_train_splsda_final$variates$X[,
1]) * -10, ebac_doms_confusion_df, bty = "o", display.rownames = TRUE,
hlines = TRUE, vlines = TRUE, cex = 0.75, bg = "white")
text(y = 12, x = 1.25, substitute(bold("Expansion")), cex = 1.75,
adj = 0, col = "#FF0000")
text(y = -4, x = 0.5, substitute(bold("No Expansion")), cex = 1.75,
adj = 0, col = "black")
text(y = min(ebac_doms_train_splsda_final$variates$X[, 2]) *
-2.25, x = min(ebac_doms_train_splsda_final$variates$X[,
1]) * -12, paste0("ACC = ", round(ebac_doms_cm$overall[1] *
100, 1), "% [", round(ebac_doms_cm$overall[3] * 100,
1), "%, ", round(ebac_doms_cm$overall[4] * 100, 1), "%]"),
cex = 0.75, adj = 0)
text(y = min(ebac_doms_train_splsda_final$variates$X[, 2]) *
-2.05, x = min(ebac_doms_train_splsda_final$variates$X[,
1]) * -12, paste0("Sens. = ", paste0(formatC(round(ebac_doms_epi$detail[2][3,
], 3) * 100, digits = 1, format = "f"), "%"), " [", paste0(formatC(round(ebac_doms_epi$detail[3][3,
], 3) * 100, digits = 1, format = "f"), "%"), ", ", paste0(formatC(round(ebac_doms_epi$detail[4][3,
], 3) * 100, digits = 1, format = "f"), "%"), "]"), cex = 0.75,
adj = 0)
text(y = min(ebac_doms_train_splsda_final$variates$X[, 2]) *
-1.85, x = min(ebac_doms_train_splsda_final$variates$X[,
1]) * -12, paste0("Spec. = ", paste0(formatC(round(ebac_doms_epi$detail[2][4,
], 3) * 100, digits = 1, format = "f"), "%"), " [", paste0(formatC(round(ebac_doms_epi$detail[3][4,
], 3) * 100, digits = 1, format = "f"), "%"), ", ", paste0(formatC(round(ebac_doms_epi$detail[4][4,
], 3) * 100, digits = 1, format = "f"), "%"), "]"), cex = 0.75,
adj = 0)
text(y = min(ebac_doms_train_splsda_final$variates$X[, 2]) *
-1.65, x = min(ebac_doms_train_splsda_final$variates$X[,
1]) * -12, paste0("OR = ", formatC(round(ebac_doms_epi$detail[2][6,
], 3), digits = 1, format = "f"), " [", formatC(round(ebac_doms_epi$detail[3][6,
], 3), digits = 1, format = "f"), ", ", formatC(round(ebac_doms_epi$detail[4][6,
], 3), digits = 1, format = "f"), "]"), cex = 0.75, adj = 0)
invisible(dev.off())
}# Take absolute value to make diversity group loadings plot narrower
diversity_train_splsda_final$loadings$X <- abs(diversity_train_splsda_final$loadings$X)
# Combine Supplemental Figures 3A + 3B
{pdf(file = "./Results/Supplemental_Figure_3.pdf",
height = 8,
width = 11)
par(mfrow = c(1,3))
# Supplmental Figure 3A
plotLoadings(
diversity_train_splsda_final,
contrib = 'max',
method = 'mean',
comp = 1,
legend = FALSE,
legend.col = c("#EDE342", "#F69A97", "#FF51EB"),
size.name = 1.1,
size.title = rel(1),
ndisplay = 50
)
# Supplmental Figure 3B
plotLoadings(
diversity_train_splsda_final,
contrib = 'max',
method = 'mean',
comp = 2,
legend.col = c("#EDE342", "#F69A97", "#FF51EB"),
size.name = 1.1,
size.title = rel(1),
ndisplay = 50
)
invisible(dev.off())
}
par(mfrow = c(1,3))
# Supplmental Figure 3A
plotLoadings(
diversity_train_splsda_final,
contrib = 'max',
method = 'mean',
comp = 1,
legend = FALSE,
legend.col = c("#EDE342", "#F69A97", "#FF51EB"),
size.name = 0.6,
size.title = rel(1),
ndisplay = 50
)
# Supplmental Figure 3B
plotLoadings(
diversity_train_splsda_final,
contrib = 'max',
method = 'mean',
comp = 2,
legend.col = c("#EDE342", "#F69A97", "#FF51EB"),
size.name = 0.6,
size.title = rel(1),
ndisplay = 50
)#### Metaphlan + Qual Metabolites ####
heatmap_data_raw <- t_metaphlan %>%
drop_na(taxid) %>%
select(sampleID) %>%
group_by(sampleID) %>%
dplyr::slice(1) %>%
mutate(db = ifelse(grepl(sampleID, pattern = "lt"), "Liver Transplant", "Healthy Donor")) %>%
left_join(metab_qual_anon) %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
compound = str_to_title(compound),
compound = recode(compound,
Preq1 = "PreQ1")) %>%
filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>%
group_by(sampleID, compound) %>%
slice_max(mvalue, with_ties = F, n = 1) %>%
ungroup() %>%
select(-db) %>%
left_join(metaphlan_df2 %>%
left_join(metaphlan_df_sumry %>% select(sampleID, db)) %>%
distinct(sampleID, db), by = "sampleID") %>%
group_by(sampleID, compound) %>%
slice_max(mvalue, with_ties = F, n = 1) %>%
left_join(alpha_shannon) %>%
group_by(db) %>%
arrange(db, Shannon) %>%
ungroup()
diversity_train_splsda_final_comp2 <- mixOmics::selectVar(diversity_train_splsda_final, comp = 2)$name
gg_diversity_comp2 <- metaphlan_df2 %>%
left_join(metaphlan_df_sumry %>% select(sampleID, diversity_group)) %>%
select(sampleID, diversity_group) %>%
distinct(sampleID, diversity_group) %>%
left_join(heatmap_data_raw) %>%
filter(compound %in% diversity_train_splsda_final_comp2 | is.na(compound)) %>%
mutate(diversity_group = factor(diversity_group, levels = c("Low Diversity", "Medium Diversity",
"High Diversity", "Healthy Donor"))) %>%
pivot_wider(id_cols = c(sampleID, diversity_group, patientID, db, Shannon), names_from = "compound", values_from = "mvalue", values_fill = NA) %>% # For missing values, just for plotting
pivot_longer(-c(sampleID, diversity_group, patientID, db, Shannon), names_to = "compound", values_to = "mvalue") %>%
mutate(compound = factor(compound, levels = c(diversity_train_splsda_final_comp2))) %>%
# filter(compound == "Omega-Muricholic Acid") %>% arrange(desc(mvalue))
filter(compound != "NA") %>%
group_by(sampleID, compound) %>%
dplyr::slice(1) %>%
ggplot(aes(x = reorder(sampleID, Shannon), y = mvalue, fill = diversity_group)) +
geom_col() +
scale_fill_manual(values = diversity_group_colors) +
theme_bw() +
theme(legend.position = "none",
axis.text.x=eb(),
axis.ticks.x=eb(),
# strip.text.x= et(angle = 0, size = 14, color = "black"),
strip.text.x = eb(),
strip.background = eb(),
strip.text.y = et(angle = 0, size = 14, color = "black", hjust = 0),
axis.title.y = et(color = "black", size = 14),
axis.text.y = et(color = "black", size = 12),
panel.spacing = unit(0.5, "lines"),
plot.margin = margin(t = 5,
r = 5,
b = 0,
l = 5),
panel.grid.minor = eb()) +
facet_grid(compound~diversity_group, scales = "free")+
scale_y_continuous(expand = expansion(mult = c(0.005,0.005))) +
ylab("Normalized Peak Area") +
xlab("")
pdf(file = "./Results/Metahphlan_Medium_Diversity_Compounds.pdf", height = 16, width = 20, onefile = FALSE)
gg.stack(gg_metaphlan,
gg_diversity_comp2,
heights = c(1, 4))
dev.off()## quartz_off_screen
## 2
# Take absolute value to make diversity group loadings plot
# narrower
ecoc_doms_train_splsda_final$loadings$X <- abs(ecoc_doms_train_splsda_final$loadings$X)
# Supplemental Figure 7AB
{
pdf(file = "./Results/Supplemental_Figure_7AB.pdf", height = 8,
width = 11)
par(mfrow = c(1, 3))
plotLoadings(ecoc_doms_train_splsda_final, contrib = "max",
method = "mean", comp = 1, legend = FALSE, legend.col = c("#0C7A3A",
"black"), size.name = 1.1, size.title = rel(1), ndisplay = 50)
plotLoadings(ecoc_doms_train_splsda_final, contrib = "max",
method = "mean", comp = 2, legend.col = c("#0C7A3A",
"black"), size.name = 1.1, size.title = rel(1), ndisplay = 50)
invisible(dev.off())
}
par(mfrow = c(1, 3))
plotLoadings(ecoc_doms_train_splsda_final, contrib = "max", method = "mean",
comp = 1, legend = FALSE, legend.col = c("#0C7A3A", "black"),
size.name = 0.6, size.title = rel(1), ndisplay = 50)
plotLoadings(ecoc_doms_train_splsda_final, contrib = "max", method = "mean",
comp = 2, legend.col = c("#0C7A3A", "black"), size.name = 0.6,
size.title = rel(1), ndisplay = 50)# Take absolute value to make diversity group loadings plot
# narrower
ebac_doms_train_splsda_final$loadings$X <- abs(ebac_doms_train_splsda_final$loadings$X)
# Supplemental Figure 7CD
{
pdf(file = "./Results/Supplemental_Figure_7CD.pdf", height = 8,
width = 11)
par(mfrow = c(1, 3))
plotLoadings(ebac_doms_train_splsda_final, contrib = "max",
method = "mean", comp = 1, legend = FALSE, legend.col = c("#FF0000",
"black"), size.name = 1.1, size.title = rel(1), ndisplay = 50)
plotLoadings(ebac_doms_train_splsda_final, contrib = "max",
method = "mean", comp = 2, legend.col = c("#FF0000",
"black"), size.name = 1.1, size.title = rel(1), ndisplay = 50)
invisible(dev.off())
}
par(mfrow = c(1, 3))
plotLoadings(ebac_doms_train_splsda_final, contrib = "max", method = "mean",
comp = 1, legend = FALSE, legend.col = c("#FF0000", "black"),
size.name = 0.6, size.title = rel(1), ndisplay = 50)
plotLoadings(ebac_doms_train_splsda_final, contrib = "max", method = "mean",
comp = 2, legend.col = c("#FF0000", "black"), size.name = 0.6,
size.title = rel(1), ndisplay = 50)# Take absolute value to make diversity group loadings plot
# narrower
infx_train_splsda_final$loadings$X <- abs(infx_train_splsda_final$loadings$X)
{
pdf(file = "./Results/Supplemental_Figure_8.pdf", height = 8,
width = 11)
par(mfrow = c(1, 3))
# Supplmental Figure 8A
plotLoadings(infx_train_splsda_final, contrib = "max", method = "mean",
comp = 1, legend = FALSE, legend.col = c("goldenrod",
"gray75"), size.name = 1.1, size.title = rel(1),
ndisplay = 50)
# Supplmental Figure 8B
plotLoadings(infx_train_splsda_final, contrib = "max", method = "mean",
comp = 2, legend.col = c("goldenrod", "gray75"), size.name = 1.1,
size.title = rel(1), ndisplay = 50)
invisible(dev.off())
}
par(mfrow = c(1, 3))
# Supplmental Figure 8A
plotLoadings(infx_train_splsda_final, contrib = "max", method = "mean",
comp = 1, legend = FALSE, legend.col = c("goldenrod", "gray75"),
size.name = 0.6, size.title = rel(1), ndisplay = 50)
# Supplmental Figure 8B
plotLoadings(infx_train_splsda_final, contrib = "max", method = "mean",
comp = 2, legend.col = c("goldenrod", "gray75"), size.name = 0.6,
size.title = rel(1), ndisplay = 50)flow_chart <- flow_exclusions(incl_counts = c(158, 130, 107,
25), total_label = "Total Patients Enrolled", incl_labels = c("Patients w/ Transplant",
"Patients Included", "Patients w/ Bacterial Infection"),
excl_labels = c("No Transplant", "No Sample In Study Period\nDay -7 to +30",
"Patients w/o Bacterial Infection"), show_count = TRUE)
flow_chart## Vector of variables to summarize
demo_vars <- c("race", "sex", "age", "meld_transplant", "Alcoholic Hepatitis",
"Alcoholic Cirrhosis", "NAFLD/NASH", "Primary Sclerosing Cholangitis",
"Acute Viral Hepatitis", "Chronic Hepatitis B", "Chronic Hepatitis C",
"Autoimmune", "Wilson's Disease", "Alpha-1 Antitrypsin",
"Hemachromatosis", "Drug Induced Liver Injury or Toxin",
"Budd Chiari", "Cryptogenic", "Malignancy", "Other", "Dialysis",
"Pressers", "Mechanical Ventilation")
## Vector of categorical variables that need transformation
demo_cats <- c("race", "sex", "Alcoholic Hepatitis", "Alcoholic Cirrhosis",
"NAFLD/NASH", "Primary Sclerosing Cholangitis", "Acute Viral Hepatitis",
"Chronic Hepatitis B", "Chronic Hepatitis C", "Autoimmune",
"Wilson's Disease", "Alpha-1 Antitrypsin", "Hemachromatosis",
"Drug Induced Liver Injury or Toxin", "Budd Chiari", "Cryptogenic",
"Malignancy", "Other", "Dialysis", "Pressers", "Mechanical Ventilation")
tab1_1 <- CreateTableOne(vars = demo_vars, testNonNormal = "wilcox.test",
includeNA = TRUE, factorVars = demo_cats, strata = "any_infection",
data = demo)
summary(tab1_1) # Age is potentially skewed, need to state that it is skewed and re-run `CreateTableOne`##
## ### Summary of continuous variables ###
##
## any_infection: 0
## n miss p.miss mean sd median p25 p75 min max skew kurt
## age 82 0 0 51 17 55 40 63 2 77 -1.00 0.8
## meld_transplant 82 0 0 26 10 28 19 33 6 49 -0.04 -0.7
## ------------------------------------------------------------
## any_infection: 1
## n miss p.miss mean sd median p25 p75 min max skew kurt
## age 25 0 0 56 15 59 46 68 22 73 -0.9 0.002
## meld_transplant 25 0 0 27 10 30 19 33 11 42 -0.3 -1.057
##
## p-values
## pNormal pNonNormal
## age 0.2072738 0.1765252
## meld_transplant 0.6624494 0.6085202
##
## Standardize mean differences
## 1 vs 2
## age 0.2976093
## meld_transplant 0.1025141
##
## =======================================================================================
##
## ### Summary of categorical variables ###
##
## any_infection: 0
## var n miss p.miss
## race 82 0 0.0
##
##
##
##
##
##
##
## sex 82 0 0.0
##
##
## Alcoholic Hepatitis 82 0 0.0
##
##
## Alcoholic Cirrhosis 82 0 0.0
##
##
## NAFLD/NASH 82 0 0.0
##
##
## Primary Sclerosing Cholangitis 82 0 0.0
##
##
## Acute Viral Hepatitis 82 0 0.0
##
##
## Chronic Hepatitis B 82 0 0.0
##
##
## Chronic Hepatitis C 82 0 0.0
##
##
## Autoimmune 82 0 0.0
##
##
## Wilson's Disease 82 0 0.0
##
##
## Alpha-1 Antitrypsin 82 0 0.0
##
## Hemachromatosis 82 0 0.0
##
##
## Drug Induced Liver Injury or Toxin 82 0 0.0
##
##
## Budd Chiari 82 0 0.0
##
## Cryptogenic 82 0 0.0
##
##
## Malignancy 82 0 0.0
##
##
## Other 82 0 0.0
##
##
## Dialysis 82 0 0.0
##
##
## Pressers 82 0 0.0
##
##
## Mechanical Ventilation 82 0 0.0
##
##
## level freq percent cum.percent
## American Indian or Alaska Native 1 1.2 1.2
## Asian/Mideast Indian 6 7.3 8.5
## Black/African-American 9 11.0 19.5
## More than one Race 8 9.8 29.3
## Patient Declined 4 4.9 34.1
## Unknown 0 0.0 34.1
## White 54 65.9 100.0
##
## Female 38 46.3 46.3
## Male 44 53.7 100.0
##
## 0 76 92.7 92.7
## 1 6 7.3 100.0
##
## 0 42 51.2 51.2
## 1 40 48.8 100.0
##
## 0 72 87.8 87.8
## 1 10 12.2 100.0
##
## 0 76 92.7 92.7
## 1 6 7.3 100.0
##
## 0 78 95.1 95.1
## 1 4 4.9 100.0
##
## 0 82 100.0 100.0
## 1 0 0.0 100.0
##
## 0 78 95.1 95.1
## 1 4 4.9 100.0
##
## 0 77 93.9 93.9
## 1 5 6.1 100.0
##
## 0 80 97.6 97.6
## 1 2 2.4 100.0
##
## 0 82 100.0 100.0
##
## 0 82 100.0 100.0
## 1 0 0.0 100.0
##
## 0 81 98.8 98.8
## 1 1 1.2 100.0
##
## 0 82 100.0 100.0
##
## 0 78 95.1 95.1
## 1 4 4.9 100.0
##
## 0 65 79.3 79.3
## 1 17 20.7 100.0
##
## 0 74 90.2 90.2
## 1 8 9.8 100.0
##
## 0 60 73.2 73.2
## 1 22 26.8 100.0
##
## 0 76 92.7 92.7
## 1 6 7.3 100.0
##
## 0 77 93.9 93.9
## 1 5 6.1 100.0
##
## ------------------------------------------------------------
## any_infection: 1
## var n miss p.miss
## race 25 0 0.0
##
##
##
##
##
##
##
## sex 25 0 0.0
##
##
## Alcoholic Hepatitis 25 0 0.0
##
##
## Alcoholic Cirrhosis 25 0 0.0
##
##
## NAFLD/NASH 25 0 0.0
##
##
## Primary Sclerosing Cholangitis 25 0 0.0
##
##
## Acute Viral Hepatitis 25 0 0.0
##
##
## Chronic Hepatitis B 25 0 0.0
##
##
## Chronic Hepatitis C 25 0 0.0
##
##
## Autoimmune 25 0 0.0
##
##
## Wilson's Disease 25 0 0.0
##
##
## Alpha-1 Antitrypsin 25 0 0.0
##
## Hemachromatosis 25 0 0.0
##
##
## Drug Induced Liver Injury or Toxin 25 0 0.0
##
##
## Budd Chiari 25 0 0.0
##
## Cryptogenic 25 0 0.0
##
##
## Malignancy 25 0 0.0
##
##
## Other 25 0 0.0
##
##
## Dialysis 25 0 0.0
##
##
## Pressers 25 0 0.0
##
##
## Mechanical Ventilation 25 0 0.0
##
##
## level freq percent cum.percent
## American Indian or Alaska Native 0 0.0 0.0
## Asian/Mideast Indian 2 8.0 8.0
## Black/African-American 2 8.0 16.0
## More than one Race 2 8.0 24.0
## Patient Declined 1 4.0 28.0
## Unknown 2 8.0 36.0
## White 16 64.0 100.0
##
## Female 9 36.0 36.0
## Male 16 64.0 100.0
##
## 0 23 92.0 92.0
## 1 2 8.0 100.0
##
## 0 17 68.0 68.0
## 1 8 32.0 100.0
##
## 0 19 76.0 76.0
## 1 6 24.0 100.0
##
## 0 25 100.0 100.0
## 1 0 0.0 100.0
##
## 0 25 100.0 100.0
## 1 0 0.0 100.0
##
## 0 24 96.0 96.0
## 1 1 4.0 100.0
##
## 0 25 100.0 100.0
## 1 0 0.0 100.0
##
## 0 25 100.0 100.0
## 1 0 0.0 100.0
##
## 0 24 96.0 96.0
## 1 1 4.0 100.0
##
## 0 25 100.0 100.0
##
## 0 24 96.0 96.0
## 1 1 4.0 100.0
##
## 0 25 100.0 100.0
## 1 0 0.0 100.0
##
## 0 25 100.0 100.0
##
## 0 24 96.0 96.0
## 1 1 4.0 100.0
##
## 0 19 76.0 76.0
## 1 6 24.0 100.0
##
## 0 20 80.0 80.0
## 1 5 20.0 100.0
##
## 0 16 64.0 64.0
## 1 9 36.0 100.0
##
## 0 20 80.0 80.0
## 1 5 20.0 100.0
##
## 0 23 92.0 92.0
## 1 2 8.0 100.0
##
##
## p-values
## pApprox pExact
## race 0.3074905 0.4275466
## sex 0.4953032 0.4904875
## Alcoholic Hepatitis 1.0000000 1.0000000
## Alcoholic Cirrhosis 0.2123469 0.1713637
## NAFLD/NASH 0.2590605 0.1979409
## Primary Sclerosing Cholangitis 0.3704754 0.3322151
## Acute Viral Hepatitis 0.6007080 0.5709809
## Chronic Hepatitis B 0.5271112 0.2336449
## Chronic Hepatitis C 0.6007080 0.5709809
## Autoimmune 0.4694777 0.5886832
## Wilson's Disease 1.0000000 0.5538202
## Alpha-1 Antitrypsin NA NA
## Hemachromatosis 0.5271112 0.2336449
## Drug Induced Liver Injury or Toxin 1.0000000 1.0000000
## Budd Chiari NA NA
## Cryptogenic 1.0000000 1.0000000
## Malignancy 0.9440592 0.7828238
## Other 0.3063991 0.1774710
## Dialysis 0.5266900 0.4513768
## Pressers 0.1465607 0.1238754
## Mechanical Ventilation 1.0000000 0.6642869
##
## Standardize mean differences
## 1 vs 2
## race 0.45891730
## sex 0.21130091
## Alcoholic Hepatitis 0.02568258
## Alcoholic Cirrhosis 0.34709743
## NAFLD/NASH 0.31029007
## Primary Sclerosing Cholangitis 0.39735971
## Acute Viral Hepatitis 0.32025631
## Chronic Hepatitis B 0.28867513
## Chronic Hepatitis C 0.32025631
## Autoimmune 0.36037499
## Wilson's Disease 0.08851811
## Alpha-1 Antitrypsin 0.00000000
## Hemachromatosis 0.28867513
## Drug Induced Liver Injury or Toxin 0.15713484
## Budd Chiari 0.00000000
## Cryptogenic 0.04264158
## Malignancy 0.07849392
## Other 0.29088216
## Dialysis 0.19854168
## Pressers 0.37578690
## Mechanical Ventilation 0.07437488
tableone_skewed <- c("age", "meld_transplant")
tab1_2 <- print(tab1_1, nonnormal = tableone_skewed, formatOptions = list(big.mark = ","))## Stratified by any_infection
## 0
## n 82
## race (%)
## American Indian or Alaska Native 1 ( 1.2)
## Asian/Mideast Indian 6 ( 7.3)
## Black/African-American 9 ( 11.0)
## More than one Race 8 ( 9.8)
## Patient Declined 4 ( 4.9)
## Unknown 0 ( 0.0)
## White 54 ( 65.9)
## sex = Male (%) 44 ( 53.7)
## age (median [IQR]) 55.00 [39.50, 63.00]
## meld_transplant (median [IQR]) 28.00 [19.00, 33.00]
## Alcoholic Hepatitis = 1 (%) 6 ( 7.3)
## Alcoholic Cirrhosis = 1 (%) 40 ( 48.8)
## NAFLD/NASH = 1 (%) 10 ( 12.2)
## Primary Sclerosing Cholangitis = 1 (%) 6 ( 7.3)
## Acute Viral Hepatitis = 1 (%) 4 ( 4.9)
## Chronic Hepatitis B = 1 (%) 0 ( 0.0)
## Chronic Hepatitis C = 1 (%) 4 ( 4.9)
## Autoimmune = 1 (%) 5 ( 6.1)
## Wilson's Disease = 1 (%) 2 ( 2.4)
## Alpha-1 Antitrypsin = 0 (%) 82 (100.0)
## Hemachromatosis = 1 (%) 0 ( 0.0)
## Drug Induced Liver Injury or Toxin = 1 (%) 1 ( 1.2)
## Budd Chiari = 0 (%) 82 (100.0)
## Cryptogenic = 1 (%) 4 ( 4.9)
## Malignancy = 1 (%) 17 ( 20.7)
## Other = 1 (%) 8 ( 9.8)
## Dialysis = 1 (%) 22 ( 26.8)
## Pressers = 1 (%) 6 ( 7.3)
## Mechanical Ventilation = 1 (%) 5 ( 6.1)
## Stratified by any_infection
## 1 p
## n 25
## race (%) 0.307
## American Indian or Alaska Native 0 ( 0.0)
## Asian/Mideast Indian 2 ( 8.0)
## Black/African-American 2 ( 8.0)
## More than one Race 2 ( 8.0)
## Patient Declined 1 ( 4.0)
## Unknown 2 ( 8.0)
## White 16 ( 64.0)
## sex = Male (%) 16 ( 64.0) 0.495
## age (median [IQR]) 59.00 [46.00, 68.00] 0.177
## meld_transplant (median [IQR]) 30.00 [19.00, 33.00] 0.609
## Alcoholic Hepatitis = 1 (%) 2 ( 8.0) 1.000
## Alcoholic Cirrhosis = 1 (%) 8 ( 32.0) 0.212
## NAFLD/NASH = 1 (%) 6 ( 24.0) 0.259
## Primary Sclerosing Cholangitis = 1 (%) 0 ( 0.0) 0.370
## Acute Viral Hepatitis = 1 (%) 0 ( 0.0) 0.601
## Chronic Hepatitis B = 1 (%) 1 ( 4.0) 0.527
## Chronic Hepatitis C = 1 (%) 0 ( 0.0) 0.601
## Autoimmune = 1 (%) 0 ( 0.0) 0.469
## Wilson's Disease = 1 (%) 1 ( 4.0) 1.000
## Alpha-1 Antitrypsin = 0 (%) 25 (100.0) NA
## Hemachromatosis = 1 (%) 1 ( 4.0) 0.527
## Drug Induced Liver Injury or Toxin = 1 (%) 0 ( 0.0) 1.000
## Budd Chiari = 0 (%) 25 (100.0) NA
## Cryptogenic = 1 (%) 1 ( 4.0) 1.000
## Malignancy = 1 (%) 6 ( 24.0) 0.944
## Other = 1 (%) 5 ( 20.0) 0.306
## Dialysis = 1 (%) 9 ( 36.0) 0.527
## Pressers = 1 (%) 5 ( 20.0) 0.147
## Mechanical Ventilation = 1 (%) 2 ( 8.0) 1.000
## Stratified by any_infection
## test
## n
## race (%)
## American Indian or Alaska Native
## Asian/Mideast Indian
## Black/African-American
## More than one Race
## Patient Declined
## Unknown
## White
## sex = Male (%)
## age (median [IQR]) nonnorm
## meld_transplant (median [IQR]) nonnorm
## Alcoholic Hepatitis = 1 (%)
## Alcoholic Cirrhosis = 1 (%)
## NAFLD/NASH = 1 (%)
## Primary Sclerosing Cholangitis = 1 (%)
## Acute Viral Hepatitis = 1 (%)
## Chronic Hepatitis B = 1 (%)
## Chronic Hepatitis C = 1 (%)
## Autoimmune = 1 (%)
## Wilson's Disease = 1 (%)
## Alpha-1 Antitrypsin = 0 (%)
## Hemachromatosis = 1 (%)
## Drug Induced Liver Injury or Toxin = 1 (%)
## Budd Chiari = 0 (%)
## Cryptogenic = 1 (%)
## Malignancy = 1 (%)
## Other = 1 (%)
## Dialysis = 1 (%)
## Pressers = 1 (%)
## Mechanical Ventilation = 1 (%)
write.csv(tab1_2, "./Results/Demo_Table_1.csv", row.names = TRUE) # Saving then reading in the same data allows for an easy way to adjust p-values, since it loads the object as a dataframe
# Need to adjust pvalues and arrange properly....hence the
# multiple dataframes below
tab1_2_padjust1 <- read.csv("./Results/Demo_Table_1.csv") %>%
dplyr::rename(` ` = X, `No Infection` = X0, `Bacterial Infection` = X1)
tab1_2_padjust2 <- tab1_2_padjust1 %>%
mutate(` ` = factor(` `, levels = tab1_2_padjust1$` `))
tab1_2_padjust3 <- tab1_2_padjust1 %>%
mutate(test = ifelse(!is.na(p) & test == "", "chi.sq", test)) %>%
group_by(test) %>%
rstatix::adjust_pvalue(p.col = "p", method = "BH") %>%
ungroup() %>%
mutate(` ` = factor(` `, tab1_2_padjust2$` `)) %>%
arrange(` `) %>%
mutate(p = ifelse(is.na(p), "", p), p.adj = ifelse(is.na(p.adj),
"", p.adj))
# Read in csv to then append adjusted pvalues
write.csv(tab1_2_padjust3, "./Results/Demo_Table_1_padjust.csv",
row.names = FALSE)
#### Stratify by diversity ####
demo_div <- demo %>%
left_join(metaphlan_df_sumry %>%
mutate(patientID = str_extract(string = sampleID, pattern = "lt-[0-9]+")) %>%
distinct(patientID, diversity_group) %>%
drop_na(patientID) %>%
droplevels())
div_tab1_1 <- CreateTableOne(vars = demo_vars, testNonNormal = "kruskal.test",
includeNA = TRUE, factorVars = demo_cats, strata = "diversity_group",
data = demo_div)
summary(div_tab1_1) # Age is potentially skewed, need to state that it is skewed and re-run `CreateTableOne`##
## ### Summary of continuous variables ###
##
## diversity_group: Low Diversity
## n miss p.miss mean sd median p25 p75 min max skew kurt
## age 40 0 0 50 16 54 38 62 2 77 -0.8 0.8
## meld_transplant 40 0 0 31 8 31 29 36 7 44 -0.9 1.1
## ------------------------------------------------------------
## diversity_group: Medium Diversity
## n miss p.miss mean sd median p25 p75 min max skew kurt
## age 40 0 0 54 16 58 47 65 2 72 -1.5 2.2
## meld_transplant 40 0 0 27 10 24 20 33 9 49 0.3 -0.3
## ------------------------------------------------------------
## diversity_group: High Diversity
## n miss p.miss mean sd median p25 p75 min max skew kurt
## age 27 0 0 52 17 60 39 68 12 73 -0.7 -0.4
## meld_transplant 27 0 0 20 11 18 12 28 6 46 0.8 -0.1
##
## p-values
## pNormal pNonNormal
## age 5.654067e-01 0.3575030890
## meld_transplant 6.927777e-05 0.0001636809
##
## Standardize mean differences
## average 1 vs 2 1 vs 3 2 vs 3
## age 0.1577328 0.2387530 0.1575451 0.07690025
## meld_transplant 0.7501053 0.4792525 1.1430397 0.62802360
##
## =======================================================================================
##
## ### Summary of categorical variables ###
##
## diversity_group: Low Diversity
## var n miss p.miss
## race 40 0 0.0
##
##
##
##
##
##
##
## sex 40 0 0.0
##
##
## Alcoholic Hepatitis 40 0 0.0
##
##
## Alcoholic Cirrhosis 40 0 0.0
##
##
## NAFLD/NASH 40 0 0.0
##
##
## Primary Sclerosing Cholangitis 40 0 0.0
##
##
## Acute Viral Hepatitis 40 0 0.0
##
##
## Chronic Hepatitis B 40 0 0.0
##
##
## Chronic Hepatitis C 40 0 0.0
##
##
## Autoimmune 40 0 0.0
##
##
## Wilson's Disease 40 0 0.0
##
##
## Alpha-1 Antitrypsin 40 0 0.0
##
## Hemachromatosis 40 0 0.0
##
##
## Drug Induced Liver Injury or Toxin 40 0 0.0
##
##
## Budd Chiari 40 0 0.0
##
## Cryptogenic 40 0 0.0
##
##
## Malignancy 40 0 0.0
##
##
## Other 40 0 0.0
##
##
## Dialysis 40 0 0.0
##
##
## Pressers 40 0 0.0
##
##
## Mechanical Ventilation 40 0 0.0
##
##
## level freq percent cum.percent
## American Indian or Alaska Native 0 0.0 0.0
## Asian/Mideast Indian 3 7.5 7.5
## Black/African-American 4 10.0 17.5
## More than one Race 2 5.0 22.5
## Patient Declined 1 2.5 25.0
## Unknown 0 0.0 25.0
## White 30 75.0 100.0
##
## Female 20 50.0 50.0
## Male 20 50.0 100.0
##
## 0 35 87.5 87.5
## 1 5 12.5 100.0
##
## 0 20 50.0 50.0
## 1 20 50.0 100.0
##
## 0 35 87.5 87.5
## 1 5 12.5 100.0
##
## 0 39 97.5 97.5
## 1 1 2.5 100.0
##
## 0 38 95.0 95.0
## 1 2 5.0 100.0
##
## 0 39 97.5 97.5
## 1 1 2.5 100.0
##
## 0 40 100.0 100.0
## 1 0 0.0 100.0
##
## 0 37 92.5 92.5
## 1 3 7.5 100.0
##
## 0 38 95.0 95.0
## 1 2 5.0 100.0
##
## 0 40 100.0 100.0
##
## 0 39 97.5 97.5
## 1 1 2.5 100.0
##
## 0 39 97.5 97.5
## 1 1 2.5 100.0
##
## 0 40 100.0 100.0
##
## 0 39 97.5 97.5
## 1 1 2.5 100.0
##
## 0 37 92.5 92.5
## 1 3 7.5 100.0
##
## 0 35 87.5 87.5
## 1 5 12.5 100.0
##
## 0 23 57.5 57.5
## 1 17 42.5 100.0
##
## 0 32 80.0 80.0
## 1 8 20.0 100.0
##
## 0 35 87.5 87.5
## 1 5 12.5 100.0
##
## ------------------------------------------------------------
## diversity_group: Medium Diversity
## var n miss p.miss
## race 40 0 0.0
##
##
##
##
##
##
##
## sex 40 0 0.0
##
##
## Alcoholic Hepatitis 40 0 0.0
##
##
## Alcoholic Cirrhosis 40 0 0.0
##
##
## NAFLD/NASH 40 0 0.0
##
##
## Primary Sclerosing Cholangitis 40 0 0.0
##
##
## Acute Viral Hepatitis 40 0 0.0
##
##
## Chronic Hepatitis B 40 0 0.0
##
##
## Chronic Hepatitis C 40 0 0.0
##
##
## Autoimmune 40 0 0.0
##
##
## Wilson's Disease 40 0 0.0
##
##
## Alpha-1 Antitrypsin 40 0 0.0
##
## Hemachromatosis 40 0 0.0
##
##
## Drug Induced Liver Injury or Toxin 40 0 0.0
##
##
## Budd Chiari 40 0 0.0
##
## Cryptogenic 40 0 0.0
##
##
## Malignancy 40 0 0.0
##
##
## Other 40 0 0.0
##
##
## Dialysis 40 0 0.0
##
##
## Pressers 40 0 0.0
##
##
## Mechanical Ventilation 40 0 0.0
##
##
## level freq percent cum.percent
## American Indian or Alaska Native 1 2.5 2.5
## Asian/Mideast Indian 2 5.0 7.5
## Black/African-American 5 12.5 20.0
## More than one Race 6 15.0 35.0
## Patient Declined 1 2.5 37.5
## Unknown 0 0.0 37.5
## White 25 62.5 100.0
##
## Female 18 45.0 45.0
## Male 22 55.0 100.0
##
## 0 39 97.5 97.5
## 1 1 2.5 100.0
##
## 0 17 42.5 42.5
## 1 23 57.5 100.0
##
## 0 34 85.0 85.0
## 1 6 15.0 100.0
##
## 0 38 95.0 95.0
## 1 2 5.0 100.0
##
## 0 39 97.5 97.5
## 1 1 2.5 100.0
##
## 0 40 100.0 100.0
## 1 0 0.0 100.0
##
## 0 40 100.0 100.0
## 1 0 0.0 100.0
##
## 0 40 100.0 100.0
## 1 0 0.0 100.0
##
## 0 40 100.0 100.0
## 1 0 0.0 100.0
##
## 0 40 100.0 100.0
##
## 0 40 100.0 100.0
## 1 0 0.0 100.0
##
## 0 40 100.0 100.0
## 1 0 0.0 100.0
##
## 0 40 100.0 100.0
##
## 0 36 90.0 90.0
## 1 4 10.0 100.0
##
## 0 32 80.0 80.0
## 1 8 20.0 100.0
##
## 0 37 92.5 92.5
## 1 3 7.5 100.0
##
## 0 29 72.5 72.5
## 1 11 27.5 100.0
##
## 0 38 95.0 95.0
## 1 2 5.0 100.0
##
## 0 39 97.5 97.5
## 1 1 2.5 100.0
##
## ------------------------------------------------------------
## diversity_group: High Diversity
## var n miss p.miss
## race 27 0 0.0
##
##
##
##
##
##
##
## sex 27 0 0.0
##
##
## Alcoholic Hepatitis 27 0 0.0
##
##
## Alcoholic Cirrhosis 27 0 0.0
##
##
## NAFLD/NASH 27 0 0.0
##
##
## Primary Sclerosing Cholangitis 27 0 0.0
##
##
## Acute Viral Hepatitis 27 0 0.0
##
##
## Chronic Hepatitis B 27 0 0.0
##
##
## Chronic Hepatitis C 27 0 0.0
##
##
## Autoimmune 27 0 0.0
##
##
## Wilson's Disease 27 0 0.0
##
##
## Alpha-1 Antitrypsin 27 0 0.0
##
## Hemachromatosis 27 0 0.0
##
##
## Drug Induced Liver Injury or Toxin 27 0 0.0
##
##
## Budd Chiari 27 0 0.0
##
## Cryptogenic 27 0 0.0
##
##
## Malignancy 27 0 0.0
##
##
## Other 27 0 0.0
##
##
## Dialysis 27 0 0.0
##
##
## Pressers 27 0 0.0
##
##
## Mechanical Ventilation 27 0 0.0
##
##
## level freq percent cum.percent
## American Indian or Alaska Native 0 0.0 0.0
## Asian/Mideast Indian 3 11.1 11.1
## Black/African-American 2 7.4 18.5
## More than one Race 2 7.4 25.9
## Patient Declined 3 11.1 37.0
## Unknown 2 7.4 44.4
## White 15 55.6 100.0
##
## Female 9 33.3 33.3
## Male 18 66.7 100.0
##
## 0 25 92.6 92.6
## 1 2 7.4 100.0
##
## 0 22 81.5 81.5
## 1 5 18.5 100.0
##
## 0 22 81.5 81.5
## 1 5 18.5 100.0
##
## 0 24 88.9 88.9
## 1 3 11.1 100.0
##
## 0 26 96.3 96.3
## 1 1 3.7 100.0
##
## 0 27 100.0 100.0
## 1 0 0.0 100.0
##
## 0 23 85.2 85.2
## 1 4 14.8 100.0
##
## 0 25 92.6 92.6
## 1 2 7.4 100.0
##
## 0 26 96.3 96.3
## 1 1 3.7 100.0
##
## 0 27 100.0 100.0
##
## 0 27 100.0 100.0
## 1 0 0.0 100.0
##
## 0 27 100.0 100.0
## 1 0 0.0 100.0
##
## 0 27 100.0 100.0
##
## 0 27 100.0 100.0
## 1 0 0.0 100.0
##
## 0 15 55.6 55.6
## 1 12 44.4 100.0
##
## 0 22 81.5 81.5
## 1 5 18.5 100.0
##
## 0 24 88.9 88.9
## 1 3 11.1 100.0
##
## 0 26 96.3 96.3
## 1 1 3.7 100.0
##
## 0 26 96.3 96.3
## 1 1 3.7 100.0
##
##
## p-values
## pApprox pExact
## race 0.224346115 0.335681971
## sex 0.396960524 0.451283920
## Alcoholic Hepatitis 0.235579722 0.270062209
## Alcoholic Cirrhosis 0.005029855 0.003784742
## NAFLD/NASH 0.794836269 0.744506152
## Primary Sclerosing Cholangitis 0.316203824 0.334821352
## Acute Viral Hepatitis 0.840515121 1.000000000
## Chronic Hepatitis B 0.429385165 1.000000000
## Chronic Hepatitis C 0.002120594 0.003400761
## Autoimmune 0.208909105 0.180931478
## Wilson's Disease 0.378684095 0.467969872
## Alpha-1 Antitrypsin NA NA
## Hemachromatosis 0.429385165 1.000000000
## Drug Induced Liver Injury or Toxin 0.429385165 1.000000000
## Budd Chiari NA NA
## Cryptogenic 0.116732808 0.249704864
## Malignancy 0.001414085 0.001812476
## Other 0.398340379 0.366166291
## Dialysis 0.020393763 0.018754199
## Pressers 0.037397761 0.045589684
## Mechanical Ventilation 0.153586594 0.219265323
##
## Standardize mean differences
## average 1 vs 2 1 vs 3 2 vs 3
## race 0.58746504 0.44450333 0.615537269 0.70235452
## sex 0.22799474 0.10025094 0.342997170 0.24073611
## Alcoholic Hepatitis 0.26167960 0.38669460 0.170722265 0.22762195
## Alcoholic Cirrhosis 0.57695699 0.15085094 0.703164058 0.87685597
## NAFLD/NASH 0.11126301 0.07264327 0.166838145 0.09430761
## Primary Sclerosing Cholangitis 0.23496287 0.13187609 0.347035461 0.22597705
## Acute Viral Hepatitis 0.08830602 0.13187609 0.063569415 0.06947254
## Chronic Hepatitis B 0.15097027 0.22645541 0.226455407 0.00000000
## Chronic Hepatitis C 0.39317855 0.00000000 0.589767825 0.58976782
## Autoimmune 0.26873969 0.40269363 0.003525424 0.40000000
## Wilson's Disease 0.22178745 0.32444284 0.063569415 0.27735010
## Alpha-1 Antitrypsin 0.00000000 0.00000000 0.000000000 0.00000000
## Hemachromatosis 0.15097027 0.22645541 0.226455407 0.00000000
## Drug Induced Liver Injury or Toxin 0.15097027 0.22645541 0.226455407 0.00000000
## Budd Chiari 0.00000000 0.00000000 0.000000000 0.00000000
## Cryptogenic 0.33716165 0.31362502 0.226455407 0.47140452
## Malignancy 0.61335070 0.36910674 0.929014264 0.54193110
## Other 0.22203541 0.16724840 0.166838145 0.33201970
## Dialysis 0.50024151 0.31844694 0.757800685 0.42447691
## Pressers 0.35009029 0.46569032 0.521011140 0.06356941
## Mechanical Ventilation 0.26093600 0.38669460 0.326640860 0.06947254
tableone_skewed <- c("age", "meld_transplant")
div_tab1_2 <- print(div_tab1_1, nonnormal = tableone_skewed,
formatOptions = list(big.mark = ","))## Stratified by diversity_group
## Low Diversity
## n 40
## race (%)
## American Indian or Alaska Native 0 ( 0.0)
## Asian/Mideast Indian 3 ( 7.5)
## Black/African-American 4 ( 10.0)
## More than one Race 2 ( 5.0)
## Patient Declined 1 ( 2.5)
## Unknown 0 ( 0.0)
## White 30 ( 75.0)
## sex = Male (%) 20 ( 50.0)
## age (median [IQR]) 54.00 [37.75, 62.00]
## meld_transplant (median [IQR]) 31.00 [29.00, 36.00]
## Alcoholic Hepatitis = 1 (%) 5 ( 12.5)
## Alcoholic Cirrhosis = 1 (%) 20 ( 50.0)
## NAFLD/NASH = 1 (%) 5 ( 12.5)
## Primary Sclerosing Cholangitis = 1 (%) 1 ( 2.5)
## Acute Viral Hepatitis = 1 (%) 2 ( 5.0)
## Chronic Hepatitis B = 1 (%) 1 ( 2.5)
## Chronic Hepatitis C = 1 (%) 0 ( 0.0)
## Autoimmune = 1 (%) 3 ( 7.5)
## Wilson's Disease = 1 (%) 2 ( 5.0)
## Alpha-1 Antitrypsin = 0 (%) 40 (100.0)
## Hemachromatosis = 1 (%) 1 ( 2.5)
## Drug Induced Liver Injury or Toxin = 1 (%) 1 ( 2.5)
## Budd Chiari = 0 (%) 40 (100.0)
## Cryptogenic = 1 (%) 1 ( 2.5)
## Malignancy = 1 (%) 3 ( 7.5)
## Other = 1 (%) 5 ( 12.5)
## Dialysis = 1 (%) 17 ( 42.5)
## Pressers = 1 (%) 8 ( 20.0)
## Mechanical Ventilation = 1 (%) 5 ( 12.5)
## Stratified by diversity_group
## Medium Diversity
## n 40
## race (%)
## American Indian or Alaska Native 1 ( 2.5)
## Asian/Mideast Indian 2 ( 5.0)
## Black/African-American 5 ( 12.5)
## More than one Race 6 ( 15.0)
## Patient Declined 1 ( 2.5)
## Unknown 0 ( 0.0)
## White 25 ( 62.5)
## sex = Male (%) 22 ( 55.0)
## age (median [IQR]) 58.00 [46.75, 65.00]
## meld_transplant (median [IQR]) 24.50 [19.75, 33.00]
## Alcoholic Hepatitis = 1 (%) 1 ( 2.5)
## Alcoholic Cirrhosis = 1 (%) 23 ( 57.5)
## NAFLD/NASH = 1 (%) 6 ( 15.0)
## Primary Sclerosing Cholangitis = 1 (%) 2 ( 5.0)
## Acute Viral Hepatitis = 1 (%) 1 ( 2.5)
## Chronic Hepatitis B = 1 (%) 0 ( 0.0)
## Chronic Hepatitis C = 1 (%) 0 ( 0.0)
## Autoimmune = 1 (%) 0 ( 0.0)
## Wilson's Disease = 1 (%) 0 ( 0.0)
## Alpha-1 Antitrypsin = 0 (%) 40 (100.0)
## Hemachromatosis = 1 (%) 0 ( 0.0)
## Drug Induced Liver Injury or Toxin = 1 (%) 0 ( 0.0)
## Budd Chiari = 0 (%) 40 (100.0)
## Cryptogenic = 1 (%) 4 ( 10.0)
## Malignancy = 1 (%) 8 ( 20.0)
## Other = 1 (%) 3 ( 7.5)
## Dialysis = 1 (%) 11 ( 27.5)
## Pressers = 1 (%) 2 ( 5.0)
## Mechanical Ventilation = 1 (%) 1 ( 2.5)
## Stratified by diversity_group
## High Diversity p
## n 27
## race (%) 0.224
## American Indian or Alaska Native 0 ( 0.0)
## Asian/Mideast Indian 3 ( 11.1)
## Black/African-American 2 ( 7.4)
## More than one Race 2 ( 7.4)
## Patient Declined 3 ( 11.1)
## Unknown 2 ( 7.4)
## White 15 ( 55.6)
## sex = Male (%) 18 ( 66.7) 0.397
## age (median [IQR]) 60.00 [39.00, 68.00] 0.358
## meld_transplant (median [IQR]) 18.00 [12.00, 27.50] <0.001
## Alcoholic Hepatitis = 1 (%) 2 ( 7.4) 0.236
## Alcoholic Cirrhosis = 1 (%) 5 ( 18.5) 0.005
## NAFLD/NASH = 1 (%) 5 ( 18.5) 0.795
## Primary Sclerosing Cholangitis = 1 (%) 3 ( 11.1) 0.316
## Acute Viral Hepatitis = 1 (%) 1 ( 3.7) 0.841
## Chronic Hepatitis B = 1 (%) 0 ( 0.0) 0.429
## Chronic Hepatitis C = 1 (%) 4 ( 14.8) 0.002
## Autoimmune = 1 (%) 2 ( 7.4) 0.209
## Wilson's Disease = 1 (%) 1 ( 3.7) 0.379
## Alpha-1 Antitrypsin = 0 (%) 27 (100.0) NA
## Hemachromatosis = 1 (%) 0 ( 0.0) 0.429
## Drug Induced Liver Injury or Toxin = 1 (%) 0 ( 0.0) 0.429
## Budd Chiari = 0 (%) 27 (100.0) NA
## Cryptogenic = 1 (%) 0 ( 0.0) 0.117
## Malignancy = 1 (%) 12 ( 44.4) 0.001
## Other = 1 (%) 5 ( 18.5) 0.398
## Dialysis = 1 (%) 3 ( 11.1) 0.020
## Pressers = 1 (%) 1 ( 3.7) 0.037
## Mechanical Ventilation = 1 (%) 1 ( 3.7) 0.154
## Stratified by diversity_group
## test
## n
## race (%)
## American Indian or Alaska Native
## Asian/Mideast Indian
## Black/African-American
## More than one Race
## Patient Declined
## Unknown
## White
## sex = Male (%)
## age (median [IQR]) nonnorm
## meld_transplant (median [IQR]) nonnorm
## Alcoholic Hepatitis = 1 (%)
## Alcoholic Cirrhosis = 1 (%)
## NAFLD/NASH = 1 (%)
## Primary Sclerosing Cholangitis = 1 (%)
## Acute Viral Hepatitis = 1 (%)
## Chronic Hepatitis B = 1 (%)
## Chronic Hepatitis C = 1 (%)
## Autoimmune = 1 (%)
## Wilson's Disease = 1 (%)
## Alpha-1 Antitrypsin = 0 (%)
## Hemachromatosis = 1 (%)
## Drug Induced Liver Injury or Toxin = 1 (%)
## Budd Chiari = 0 (%)
## Cryptogenic = 1 (%)
## Malignancy = 1 (%)
## Other = 1 (%)
## Dialysis = 1 (%)
## Pressers = 1 (%)
## Mechanical Ventilation = 1 (%)
write.csv(div_tab1_2, "./Results/Demo_Table_1_Diversity.csv",
row.names = TRUE) # Saving then reading in the same data allows for an easy way to adjust p-values, since it loads the object as a dataframe
# Need to adjust pvalues and arrange properly....hence the
# multiple dataframes below
div_tab1_2_padjust1 <- read.csv("./Results/Demo_Table_1_Diversity.csv") %>%
dplyr::rename(` ` = X, `Low Diversity` = Low.Diversity, `Medium Diversity` = Medium.Diversity,
`High Diversity` = High.Diversity)
div_tab1_2_padjust2 <- div_tab1_2_padjust1 %>%
mutate(` ` = factor(` `, levels = div_tab1_2_padjust1$` `))
div_tab1_2_padjust3 <- div_tab1_2_padjust1 %>%
mutate(test = ifelse(!is.na(p) & test == "", "chi.sq", test)) %>%
group_by(test) %>%
rstatix::adjust_pvalue(p.col = "p", method = "BH") %>%
ungroup() %>%
mutate(` ` = factor(` `, div_tab1_2_padjust2$` `)) %>%
arrange(` `) %>%
mutate(p = ifelse(is.na(p), "", p), p.adj = ifelse(is.na(p.adj),
"", p.adj))
# Read in csv to then append adjusted pvalues
write.csv(div_tab1_2_padjust3, "./Results/Demo_Table_1_Diversity_padjust.csv",
row.names = FALSE)# Percentage of infections # where there is an infection
# within 30 days after transplant and not 0 days before
cult_percent_infx_all <- peri_criteria_all %>%
filter(bact_infection_present == "Yes", between(eday, 0,
30)) %>%
group_by(patientID, sampleID, eday) %>%
arrange(-infx_stool) %>%
dplyr::slice(1) %>%
ungroup() %>%
select(patientID, sampleID, bact_infection_present, infx_stool,
organism1, micro1.factor) %>%
distinct() %>%
mutate(organism1 = gsub(x = organism1, pattern = "\\s+",
replacement = ""), organism1 = str_to_lower(string = organism1),
organism1 = ifelse(grepl(x = organism1, pattern = "enterococcus|enterobacterales|klebsiella|escherichia|citrobacter|proteus|staphyl|clostrid|pseudo|steno|bacteroides|helico"),
organism1, "Culture Negative")) %>%
group_by(patientID, sampleID, infx_stool) %>%
mutate(`Enterococcus faecium` = grepl(x = organism1, pattern = "enterococcusfaecium",
ignore.case = T), `Enterococcus faecalis` = grepl(x = organism1,
pattern = "enterococcusfaecalis", ignore.case = T), `Enterococcus avium` = grepl(x = organism1,
pattern = "enterococcusavium", ignore.case = T), `Enterococcus gallinarum` = grepl(x = organism1,
pattern = "enterococcusgallinarum", ignore.case = T),
`Klebsiella pneumoniae` = grepl(x = organism1, pattern = "klebsiellapneumoniae",
ignore.case = T), `Enterobacter cloaceae` = grepl(x = organism1,
pattern = "enterobactercloaceae", ignore.case = T),
`Escherichia coli` = grepl(x = organism1, pattern = "escherichiacoli",
ignore.case = T), `Citrobacter freundii` = grepl(x = organism1,
pattern = "citrobacterfreundii", ignore.case = T),
`Proteus mirabilis` = grepl(x = organism1, pattern = "proteusmirabilis",
ignore.case = T), `Staphylococcus aureus` = grepl(x = organism1,
pattern = "staphylococcusaureus", ignore.case = T),
`Staphylococcus epidermis` = grepl(x = organism1, pattern = "staphylococcusepidermidis",
ignore.case = T), `Pseudomonas aeruginosa` = grepl(x = organism1,
pattern = "pseudomonasaeruginosa", ignore.case = T),
`Stenotrophmonas maltophilia` = grepl(x = organism1,
pattern = "stenotrophmonasmaltophilia", ignore.case = T),
`Helicobacter pylori` = grepl(x = organism1, pattern = "helicobacterpylori",
ignore.case = T), `Clostridium difficile` = grepl(x = organism1,
pattern = "clostridiumdifficile|clostridioidesdifficile",
ignore.case = T), `Bacteroides sp.` = grepl(x = organism1,
pattern = "bacteroides", ignore.case = T), `Culture Negative` = grepl(x = organism1,
pattern = "Culture Negative", ignore.case = T)) %>%
pivot_longer(-c(patientID:micro1.factor), names_to = "organisms",
values_to = "org_presence") %>%
mutate(organisms = ifelse(bact_infection_present == "No",
"No Bacterial Infection", organisms), org_presence = ifelse(org_presence ==
TRUE, 1, 0)) %>%
group_by(sampleID, infx_stool, bact_infection_present, organisms) %>%
dplyr::slice_max(org_presence) %>%
ungroup() %>%
filter(organisms != "No Bacterial Infection") %>%
group_by(patientID, bact_infection_present, organisms) %>%
dplyr::slice_max(org_presence) %>%
ungroup() %>%
filter(org_presence == 1) %>%
group_by(patientID, infx_stool, organisms) %>%
dplyr::slice(1) %>%
group_by(organisms) %>%
tally() %>%
ungroup() %>%
mutate(total_infections = sum(n)) %>%
replace_na(list(group = "unknown")) %>%
group_by(organisms) %>%
dplyr::summarize(percent = sum(n)/total_infections * 100,
count = sum(n))
# Location of infections # where there is an infection
# within 30 days after transplant and not 0 days before
cult_location_infx_all <- peri_criteria_all %>%
filter(bact_infection_present == "Yes", between(eday, 0,
30)) %>%
group_by(patientID, eday, key) %>%
arrange(-infx_stool) %>%
dplyr::slice(1) %>%
ungroup() %>%
select(patientID, bact_infection_present, infx_stool, organism1,
key) %>%
distinct() %>%
mutate(organism1 = gsub(x = organism1, pattern = "\\s+",
replacement = ""), organism1 = str_to_lower(string = organism1),
organism1 = ifelse(grepl(x = organism1, pattern = "enterococcus|enterobacterales|klebsiella|escherichia|citrobacter|proteus|staphyl|clostrid|pseudo|steno|bacteroides|helico"),
organism1, "Culture Negative")) %>%
group_by(patientID, infx_stool) %>%
mutate(`Enterococcus faecium` = grepl(x = organism1, pattern = "enterococcusfaecium",
ignore.case = T), `Enterococcus faecalis` = grepl(x = organism1,
pattern = "enterococcusfaecalis", ignore.case = T), `Enterococcus avium` = grepl(x = organism1,
pattern = "enterococcusavium", ignore.case = T), `Enterococcus gallinarum` = grepl(x = organism1,
pattern = "enterococcusgallinarum", ignore.case = T),
`Klebsiella pneumoniae` = grepl(x = organism1, pattern = "klebsiellapneumoniae",
ignore.case = T), `Enterobacter cloaceae` = grepl(x = organism1,
pattern = "enterobactercloaceae", ignore.case = T),
`Escherichia coli` = grepl(x = organism1, pattern = "escherichiacoli",
ignore.case = T), `Citrobacter freundii` = grepl(x = organism1,
pattern = "citrobacterfreundii", ignore.case = T),
`Proteus mirabilis` = grepl(x = organism1, pattern = "proteusmirabilis",
ignore.case = T), `Staphylococcus aureus` = grepl(x = organism1,
pattern = "staphylococcusaureus", ignore.case = T),
`Staphylococcus epidermis` = grepl(x = organism1, pattern = "staphylococcusepidermidis",
ignore.case = T), `Pseudomonas aeruginosa` = grepl(x = organism1,
pattern = "pseudomonasaeruginosa", ignore.case = T),
`Stenotrophmonas maltophilia` = grepl(x = organism1,
pattern = "stenotrophmonasmaltophilia", ignore.case = T),
`Helicobacter pylori` = grepl(x = organism1, pattern = "helicobacterpylori",
ignore.case = T), `Clostridium difficile` = grepl(x = organism1,
pattern = "clostridiumdifficile|clostridioidesdifficile",
ignore.case = T), `Bacteroides sp.` = grepl(x = organism1,
pattern = "bacteroides", ignore.case = T), `Culture Negative` = grepl(x = organism1,
pattern = "Culture Negative", ignore.case = T)) %>%
pivot_longer(-c(patientID:key), names_to = "organisms", values_to = "org_presence") %>%
mutate(organisms = ifelse(bact_infection_present == "No",
"No Bacterial Infection", organisms), org_presence = ifelse(org_presence ==
TRUE, 1, 0)) %>%
group_by(patientID, key, infx_stool, bact_infection_present) %>%
dplyr::slice_max(org_presence) %>%
ungroup() %>%
filter(org_presence == 1) %>%
filter(organisms != "No Bacterial Infection") %>%
group_by(patientID, infx_stool, key) %>%
dplyr::slice(1) %>%
group_by(key) %>%
tally() %>%
ungroup() %>%
mutate(total_infections = sum(n)) %>%
replace_na(list(group = "unknown")) %>%
group_by(key) %>%
dplyr::summarize(percent = sum(n)/total_infections * 100,
count = sum(n))
# Type of Infection # where there is an infection within 30
# days after transplant and not 0 days before
cult_type_infx_all <- peri_criteria_all %>%
filter(bact_infection_present == "Yes", grepl(variable, pattern = "anatomy.+"),
between(eday, 0, 30)) %>%
group_by(patientID, eday, diag_cat2) %>%
arrange(-infx_stool) %>%
dplyr::slice(1) %>%
ungroup() %>%
mutate(diag_cat3 = case_when(grepl("abdominal", diag_cat2,
ignore.case = T) ~ "Abdominal", grepl("vir|bronchitis|covid-19|cmv",
diag_cat2, ignore.case = T) ~ "Viral", grepl("bacteremia",
diag_cat2, ignore.case = T) ~ "Bacteremia", grepl("thrush",
diag_cat2, ignore.case = T) ~ "Fungal", grepl("cholangitis|empyema|panniculitis|peritonitis|latent tb",
diag_cat2, ignore.case = T) ~ "Bacterial", grepl("cystitis|pyelonephritis",
diag_cat2, ignore.case = T) ~ "Urinary Tract Infection",
grepl("pneumonia", diag_cat2, ignore.case = T) ~ "Pneumonia",
TRUE ~ diag_cat2), diag_cat3 = str_to_title(diag_cat3)) %>%
ungroup() %>%
select(patientID, bact_infection_present, infx_stool, organism1,
micro1.factor, key, diag_cat3) %>%
distinct() %>%
mutate(organism1 = gsub(x = organism1, pattern = "\\s+",
replacement = ""), organism1 = str_to_lower(string = organism1),
organism1 = ifelse(grepl(x = organism1, pattern = "enterococcus|enterobacterales|klebsiella|escherichia|citrobacter|proteus|staphyl|clostrid|pseudo|steno|bacteroides|helico"),
organism1, "Culture Negative")) %>%
group_by(patientID, infx_stool) %>%
mutate(`Enterococcus faecium` = grepl(x = organism1, pattern = "enterococcusfaecium",
ignore.case = T), `Enterococcus faecalis` = grepl(x = organism1,
pattern = "enterococcusfaecalis", ignore.case = T), `Enterococcus avium` = grepl(x = organism1,
pattern = "enterococcusavium", ignore.case = T), `Enterococcus gallinarum` = grepl(x = organism1,
pattern = "enterococcusgallinarum", ignore.case = T),
`Klebsiella pneumoniae` = grepl(x = organism1, pattern = "klebsiellapneumoniae",
ignore.case = T), `Enterobacter cloaceae` = grepl(x = organism1,
pattern = "enterobactercloaceae", ignore.case = T),
`Escherichia coli` = grepl(x = organism1, pattern = "escherichiacoli",
ignore.case = T), `Citrobacter freundii` = grepl(x = organism1,
pattern = "citrobacterfreundii", ignore.case = T),
`Proteus mirabilis` = grepl(x = organism1, pattern = "proteusmirabilis",
ignore.case = T), `Staphylococcus aureus` = grepl(x = organism1,
pattern = "staphylococcusaureus", ignore.case = T),
`Staphylococcus epidermis` = grepl(x = organism1, pattern = "staphylococcusepidermidis",
ignore.case = T), `Pseudomonas aeruginosa` = grepl(x = organism1,
pattern = "pseudomonasaeruginosa", ignore.case = T),
`Stenotrophmonas maltophilia` = grepl(x = organism1,
pattern = "stenotrophmonasmaltophilia", ignore.case = T),
`Helicobacter pylori` = grepl(x = organism1, pattern = "helicobacterpylori",
ignore.case = T), `Clostridium difficile` = grepl(x = organism1,
pattern = "clostridiumdifficile|clostridioidesdifficile",
ignore.case = T), `Bacteroides sp.` = grepl(x = organism1,
pattern = "bacteroides", ignore.case = T), `Culture Negative` = grepl(x = organism1,
pattern = "Culture Negative", ignore.case = T)) %>%
pivot_longer(-c(patientID:diag_cat3), names_to = "organisms",
values_to = "org_presence") %>%
mutate(organisms = ifelse(bact_infection_present == "No",
"No Bacterial Infection", organisms), org_presence = ifelse(org_presence ==
TRUE, 1, 0)) %>%
group_by(patientID, diag_cat3, infx_stool, bact_infection_present) %>%
dplyr::slice_max(org_presence) %>%
ungroup() %>%
filter(org_presence == 1, organisms != "No Bacterial Infection") %>%
ungroup() %>%
group_by(patientID, infx_stool, diag_cat3) %>%
dplyr::slice(1) %>%
group_by(diag_cat3) %>%
tally() %>%
ungroup() %>%
mutate(total_infections = sum(n)) %>%
group_by(diag_cat3) %>%
dplyr::summarize(percent = sum(n)/total_infections * 100,
count = sum(n))
culture_tot <- bind_rows(cult_percent_infx_all, cult_location_infx_all,
cult_type_infx_all) %>%
mutate(percent = round(percent, 2))
# Read in csv to then append adjusted pvalues
write.csv(culture_tot, "./Results/Supplemental_Table_3.csv",
row.names = FALSE)## Vector of variables to summarize
abx_vars <- c("Basiliximab", "Mycophenolate", "Steroid", "Systemic Vancomycin",
"Tacrolimus", "Cefepime", "Metronidazole", "Piperacillin/Tazobactam",
"Rifaximin", "Ceftriaxone", "Ciprofloxacin", "Gentamicin",
"Tobramicin", "Daptomycin", "Meropenem", "Oral Vancomycin")
abx2 <- abx %>%
filter(grepl(pattern = "basilix|tacro|steroid|mycophenolate|lactulose",
medication_name, ignore.case = T) | grepl(pattern = "GLUCOCORTICOIDS|steroid",
pharm_class, ignore.case = T) | grepl(pattern = "steroid",
pharm_sub_class, ignore.case = T) | grepl("given", mar_action,
ignore.case = T)) %>%
mutate(Immunosuppressants = case_when(grepl("basilix", medication_name,
ignore.case = T) ~ "Basiliximab", grepl("tacro", medication_name,
ignore.case = T) ~ "Tacrolimus", grepl("one|ide|solu-cortef",
medication_name, ignore.case = T) ~ "Steroid", grepl("mycophenolate",
medication_name, ignore.case = T) ~ "Mycophenolate",
grepl("lactulose", medication_name, ignore.case = T) ~
"Lactulose"), Antibiotics = case_when(grepl("rifaximin",
medication_name, ignore.case = T) ~ "Rifaximin", grepl("lactulose",
medication_name, ignore.case = T) ~ "Lactulose", grepl("ceftriaxone",
medication_name, ignore.case = T) ~ "Ceftriaxone", grepl("piperacillin|tazobactam",
medication_name, ignore.case = T) ~ "Piperacillin/Tazobactam",
grepl("cefepime", medication_name, ignore.case = T) ~
"Cefepime", grepl("meropenem", medication_name, ignore.case = T) ~
"Meropenem", grepl("gentamicin", medication_name,
ignore.case = T) ~ "Gentamicin", grepl("tobramycin",
medication_name, ignore.case = T) ~ "Tobramicin",
grepl("vancomycin.+oral", medication_name, ignore.case = T) ~
"Oral Vancomycin", grepl("vancomycin.+(IV|Intravenous)",
medication_name, ignore.case = T) ~ "Systemic Vancomycin",
grepl("METRONIDAZOLE", medication_name, ignore.case = T) ~
"Metronidazole", grepl("DAPTOMYCIN", medication_name,
ignore.case = T) ~ "Daptomycin", grepl("linezolid",
medication_name, ignore.case = T) ~ "Linezolid",
grepl("fluconazole", medication_name, ignore.case = T) ~
"Fluconazole", grepl("micafungin", medication_name,
ignore.case = T) ~ "Micafungin", grepl("cipro", medication_name,
ignore.case = T) & !grepl("drop", dose_units, ignore.case = T) ~
"Ciprofloxacin"), action = case_when(!is.na(Immunosuppressants) &
between(days_transplant, 0, 30) | !is.na(Immunosuppressants) &
ordering_mode == "Outpatient" ~ "keep", !is.na(Antibiotics) &
between(days_transplant, -14, 1) ~ "keep", TRUE ~ "remove")) %>%
group_by(patientID, Immunosuppressants, Antibiotics) %>%
arrange(days_transplant) %>%
filter(action == "keep") %>%
dplyr::slice(1) %>%
select(patientID, Immunosuppressants, Antibiotics) %>%
left_join(peri_matrix_all %>%
select(patientID, any_infection)) %>%
pivot_longer(!c(patientID, any_infection), names_to = "variable",
values_to = "value") %>%
drop_na(value) %>%
mutate(variable = 1) %>%
pivot_wider(c(patientID, any_infection), names_from = "value",
values_from = "variable", values_fn = min) %>%
replace(is.na(.), 0)
abx_tab1_1 <- CreateTableOne(vars = abx_vars, testNonNormal = "wilcox.test",
includeNA = FALSE, factorVars = abx_vars, strata = "any_infection",
data = abx2)
summary(abx_tab1_1)##
## ### Summary of categorical variables ###
##
## any_infection: 0
## var n miss p.miss level freq percent cum.percent
## Basiliximab 82 0 0.0 0 29 35.4 35.4
## 1 53 64.6 100.0
##
## Mycophenolate 82 0 0.0 0 17 20.7 20.7
## 1 65 79.3 100.0
##
## Steroid 82 0 0.0 1 82 100.0 100.0
##
## Systemic Vancomycin 82 0 0.0 0 37 45.1 45.1
## 1 45 54.9 100.0
##
## Tacrolimus 82 0 0.0 0 1 1.2 1.2
## 1 81 98.8 100.0
##
## Cefepime 82 0 0.0 0 52 63.4 63.4
## 1 30 36.6 100.0
##
## Metronidazole 82 0 0.0 0 46 56.1 56.1
## 1 36 43.9 100.0
##
## Piperacillin/Tazobactam 82 0 0.0 0 12 14.6 14.6
## 1 70 85.4 100.0
##
## Rifaximin 82 0 0.0 0 41 50.0 50.0
## 1 41 50.0 100.0
##
## Ceftriaxone 82 0 0.0 0 64 78.0 78.0
## 1 18 22.0 100.0
##
## Ciprofloxacin 82 0 0.0 0 66 80.5 80.5
## 1 16 19.5 100.0
##
## Gentamicin 82 0 0.0 0 81 98.8 98.8
## 1 1 1.2 100.0
##
## Tobramicin 82 0 0.0 0 78 95.1 95.1
## 1 4 4.9 100.0
##
## Daptomycin 82 0 0.0 0 81 98.8 98.8
## 1 1 1.2 100.0
##
## Meropenem 82 0 0.0 0 78 95.1 95.1
## 1 4 4.9 100.0
##
## Oral Vancomycin 82 0 0.0 0 80 97.6 97.6
## 1 2 2.4 100.0
##
## ------------------------------------------------------------
## any_infection: 1
## var n miss p.miss level freq percent cum.percent
## Basiliximab 25 0 0.0 0 8 32.0 32.0
## 1 17 68.0 100.0
##
## Mycophenolate 25 0 0.0 0 4 16.0 16.0
## 1 21 84.0 100.0
##
## Steroid 25 0 0.0 1 25 100.0 100.0
##
## Systemic Vancomycin 25 0 0.0 0 7 28.0 28.0
## 1 18 72.0 100.0
##
## Tacrolimus 25 0 0.0 0 0 0.0 0.0
## 1 25 100.0 100.0
##
## Cefepime 25 0 0.0 0 13 52.0 52.0
## 1 12 48.0 100.0
##
## Metronidazole 25 0 0.0 0 12 48.0 48.0
## 1 13 52.0 100.0
##
## Piperacillin/Tazobactam 25 0 0.0 0 2 8.0 8.0
## 1 23 92.0 100.0
##
## Rifaximin 25 0 0.0 0 13 52.0 52.0
## 1 12 48.0 100.0
##
## Ceftriaxone 25 0 0.0 0 17 68.0 68.0
## 1 8 32.0 100.0
##
## Ciprofloxacin 25 0 0.0 0 21 84.0 84.0
## 1 4 16.0 100.0
##
## Gentamicin 25 0 0.0 0 24 96.0 96.0
## 1 1 4.0 100.0
##
## Tobramicin 25 0 0.0 0 22 88.0 88.0
## 1 3 12.0 100.0
##
## Daptomycin 25 0 0.0 0 22 88.0 88.0
## 1 3 12.0 100.0
##
## Meropenem 25 0 0.0 0 24 96.0 96.0
## 1 1 4.0 100.0
##
## Oral Vancomycin 25 0 0.0 0 24 96.0 96.0
## 1 1 4.0 100.0
##
##
## p-values
## pApprox pExact
## Basiliximab 0.94452638 0.81451961
## Mycophenolate 0.81509810 0.77620871
## Steroid NA NA
## Systemic Vancomycin 0.19672525 0.16534958
## Tacrolimus 1.00000000 1.00000000
## Cefepime 0.42996989 0.35335166
## Metronidazole 0.62972047 0.50032437
## Piperacillin/Tazobactam 0.60142514 0.51269562
## Rifaximin 1.00000000 1.00000000
## Ceftriaxone 0.44773353 0.30160442
## Ciprofloxacin 0.91929331 0.77859454
## Gentamicin 0.95599591 0.41438900
## Tobramicin 0.42443880 0.35037337
## Daptomycin 0.05938894 0.03899733
## Meropenem 1.00000000 1.00000000
## Oral Vancomycin 1.00000000 0.55382019
##
## Standardize mean differences
## 1 vs 2
## Basiliximab 0.07126120
## Mycophenolate 0.12243021
## Steroid 0.00000000
## Systemic Vancomycin 0.36127548
## Tacrolimus 0.15713484
## Cefepime 0.23261173
## Metronidazole 0.16262229
## Piperacillin/Tazobactam 0.21056770
## Rifaximin 0.04001601
## Ceftriaxone 0.22787388
## Ciprofloxacin 0.09200514
## Gentamicin 0.17507370
## Tobramicin 0.25833951
## Daptomycin 0.44449214
## Meropenem 0.04264158
## Oral Vancomycin 0.08851811
## Stratified by any_infection
## 0 1 p test
## n 82 25
## Basiliximab = 1 (%) 53 ( 64.6) 17 ( 68.0) 0.945
## Mycophenolate = 1 (%) 65 ( 79.3) 21 ( 84.0) 0.815
## Steroid = 1 (%) 82 (100.0) 25 (100.0) NA
## Systemic Vancomycin = 1 (%) 45 ( 54.9) 18 ( 72.0) 0.197
## Tacrolimus = 1 (%) 81 ( 98.8) 25 (100.0) 1.000
## Cefepime = 1 (%) 30 ( 36.6) 12 ( 48.0) 0.430
## Metronidazole = 1 (%) 36 ( 43.9) 13 ( 52.0) 0.630
## Piperacillin/Tazobactam = 1 (%) 70 ( 85.4) 23 ( 92.0) 0.601
## Rifaximin = 1 (%) 41 ( 50.0) 12 ( 48.0) 1.000
## Ceftriaxone = 1 (%) 18 ( 22.0) 8 ( 32.0) 0.448
## Ciprofloxacin = 1 (%) 16 ( 19.5) 4 ( 16.0) 0.919
## Gentamicin = 1 (%) 1 ( 1.2) 1 ( 4.0) 0.956
## Tobramicin = 1 (%) 4 ( 4.9) 3 ( 12.0) 0.424
## Daptomycin = 1 (%) 1 ( 1.2) 3 ( 12.0) 0.059
## Meropenem = 1 (%) 4 ( 4.9) 1 ( 4.0) 1.000
## Oral Vancomycin = 1 (%) 2 ( 2.4) 1 ( 4.0) 1.000
write.csv(abx_tab1_2, "./Results/ABX_Table_1.csv", row.names = TRUE) # Saving then reading in the same data allows for an easy way to adjust p-values, since it loads the object as a dataframe
# Need to adjust pvalues and arrange properly....hence the
# multiple dataframes below
abx_tab1_2_padjust1 <- read.csv("./Results/ABX_Table_1.csv") %>%
dplyr::rename(` ` = X, `No Infection` = X0, `Bacterial Infection` = X1)
abx_tab1_2_padjust2 <- abx_tab1_2_padjust1 %>%
mutate(` ` = factor(` `, levels = abx_tab1_2_padjust1$` `))
abx_tab1_2_padjust3 <- abx_tab1_2_padjust1 %>%
mutate(test = ifelse(!is.na(p) & is.na(test), "chi.sq", "")) %>%
group_by(test) %>%
rstatix::adjust_pvalue(p.col = "p", method = "BH") %>%
ungroup() %>%
mutate(` ` = factor(` `, abx_tab1_2_padjust2$` `)) %>%
arrange(` `) %>%
mutate(p = ifelse(is.na(p), "", p), p.adj = ifelse(is.na(p.adj),
"", p.adj))
# Read in csv to then append adjusted pvalues
write.csv(abx_tab1_2_padjust3, "./Results/ABX_Table_1_padjust.csv",
row.names = FALSE)
#### Stratify by diversity ####
abx2_div <- abx2 %>%
left_join(metaphlan_df_sumry %>%
mutate(patientID = str_extract(string = sampleID, pattern = "lt-[0-9]+")) %>%
distinct(patientID, diversity_group) %>%
drop_na(patientID) %>%
droplevels())
abx_div_tab1_1 <- CreateTableOne(vars = abx_vars, testNonNormal = "kruskal.test",
includeNA = TRUE, factorVars = abx_vars, strata = "diversity_group",
data = abx2_div)
summary(abx_div_tab1_1)##
## ### Summary of categorical variables ###
##
## diversity_group: Low Diversity
## var n miss p.miss level freq percent cum.percent
## Basiliximab 40 0 0.0 0 9 22.5 22.5
## 1 31 77.5 100.0
##
## Mycophenolate 40 0 0.0 0 7 17.5 17.5
## 1 33 82.5 100.0
##
## Steroid 40 0 0.0 1 40 100.0 100.0
##
## Systemic Vancomycin 40 0 0.0 0 7 17.5 17.5
## 1 33 82.5 100.0
##
## Tacrolimus 40 0 0.0 0 0 0.0 0.0
## 1 40 100.0 100.0
##
## Cefepime 40 0 0.0 0 13 32.5 32.5
## 1 27 67.5 100.0
##
## Metronidazole 40 0 0.0 0 11 27.5 27.5
## 1 29 72.5 100.0
##
## Piperacillin/Tazobactam 40 0 0.0 0 8 20.0 20.0
## 1 32 80.0 100.0
##
## Rifaximin 40 0 0.0 0 10 25.0 25.0
## 1 30 75.0 100.0
##
## Ceftriaxone 40 0 0.0 0 21 52.5 52.5
## 1 19 47.5 100.0
##
## Ciprofloxacin 40 0 0.0 0 30 75.0 75.0
## 1 10 25.0 100.0
##
## Gentamicin 40 0 0.0 0 38 95.0 95.0
## 1 2 5.0 100.0
##
## Tobramicin 40 0 0.0 0 37 92.5 92.5
## 1 3 7.5 100.0
##
## Daptomycin 40 0 0.0 0 37 92.5 92.5
## 1 3 7.5 100.0
##
## Meropenem 40 0 0.0 0 36 90.0 90.0
## 1 4 10.0 100.0
##
## Oral Vancomycin 40 0 0.0 0 39 97.5 97.5
## 1 1 2.5 100.0
##
## ------------------------------------------------------------
## diversity_group: Medium Diversity
## var n miss p.miss level freq percent cum.percent
## Basiliximab 40 0 0.0 0 15 37.5 37.5
## 1 25 62.5 100.0
##
## Mycophenolate 40 0 0.0 0 7 17.5 17.5
## 1 33 82.5 100.0
##
## Steroid 40 0 0.0 1 40 100.0 100.0
##
## Systemic Vancomycin 40 0 0.0 0 23 57.5 57.5
## 1 17 42.5 100.0
##
## Tacrolimus 40 0 0.0 0 1 2.5 2.5
## 1 39 97.5 100.0
##
## Cefepime 40 0 0.0 0 30 75.0 75.0
## 1 10 25.0 100.0
##
## Metronidazole 40 0 0.0 0 27 67.5 67.5
## 1 13 32.5 100.0
##
## Piperacillin/Tazobactam 40 0 0.0 0 3 7.5 7.5
## 1 37 92.5 100.0
##
## Rifaximin 40 0 0.0 0 21 52.5 52.5
## 1 19 47.5 100.0
##
## Ceftriaxone 40 0 0.0 0 35 87.5 87.5
## 1 5 12.5 100.0
##
## Ciprofloxacin 40 0 0.0 0 32 80.0 80.0
## 1 8 20.0 100.0
##
## Gentamicin 40 0 0.0 0 40 100.0 100.0
## 1 0 0.0 100.0
##
## Tobramicin 40 0 0.0 0 37 92.5 92.5
## 1 3 7.5 100.0
##
## Daptomycin 40 0 0.0 0 39 97.5 97.5
## 1 1 2.5 100.0
##
## Meropenem 40 0 0.0 0 40 100.0 100.0
## 1 0 0.0 100.0
##
## Oral Vancomycin 40 0 0.0 0 39 97.5 97.5
## 1 1 2.5 100.0
##
## ------------------------------------------------------------
## diversity_group: High Diversity
## var n miss p.miss level freq percent cum.percent
## Basiliximab 27 0 0.0 0 13 48.1 48.1
## 1 14 51.9 100.0
##
## Mycophenolate 27 0 0.0 0 7 25.9 25.9
## 1 20 74.1 100.0
##
## Steroid 27 0 0.0 1 27 100.0 100.0
##
## Systemic Vancomycin 27 0 0.0 0 14 51.9 51.9
## 1 13 48.1 100.0
##
## Tacrolimus 27 0 0.0 0 0 0.0 0.0
## 1 27 100.0 100.0
##
## Cefepime 27 0 0.0 0 22 81.5 81.5
## 1 5 18.5 100.0
##
## Metronidazole 27 0 0.0 0 20 74.1 74.1
## 1 7 25.9 100.0
##
## Piperacillin/Tazobactam 27 0 0.0 0 3 11.1 11.1
## 1 24 88.9 100.0
##
## Rifaximin 27 0 0.0 0 23 85.2 85.2
## 1 4 14.8 100.0
##
## Ceftriaxone 27 0 0.0 0 25 92.6 92.6
## 1 2 7.4 100.0
##
## Ciprofloxacin 27 0 0.0 0 25 92.6 92.6
## 1 2 7.4 100.0
##
## Gentamicin 27 0 0.0 0 27 100.0 100.0
## 1 0 0.0 100.0
##
## Tobramicin 27 0 0.0 0 26 96.3 96.3
## 1 1 3.7 100.0
##
## Daptomycin 27 0 0.0 0 27 100.0 100.0
## 1 0 0.0 100.0
##
## Meropenem 27 0 0.0 0 26 96.3 96.3
## 1 1 3.7 100.0
##
## Oral Vancomycin 27 0 0.0 0 26 96.3 96.3
## 1 1 3.7 100.0
##
##
## p-values
## pApprox pExact
## Basiliximab 8.508421e-02 8.535605e-02
## Mycophenolate 6.349040e-01 6.458911e-01
## Steroid NA NA
## Systemic Vancomycin 5.716137e-04 4.324973e-04
## Tacrolimus 4.293852e-01 1.000000e+00
## Cefepime 1.979062e-05 2.581953e-05
## Metronidazole 8.987941e-05 8.806651e-05
## Piperacillin/Tazobactam 2.379309e-01 2.752945e-01
## Rifaximin 8.036339e-06 4.082253e-06
## Ceftriaxone 7.787926e-05 1.113622e-04
## Ciprofloxacin 1.868755e-01 1.767183e-01
## Gentamicin 1.814265e-01 3.369776e-01
## Tobramicin 7.882663e-01 7.925334e-01
## Daptomycin 2.476016e-01 4.468096e-01
## Meropenem 1.019801e-01 1.156897e-01
## Oral Vancomycin 9.477494e-01 1.000000e+00
##
## Standardize mean differences
## average 1 vs 2 1 vs 3 2 vs 3
## Basiliximab 0.36842303 0.3318008 0.55702112 0.21644722
## Mycophenolate 0.13696235 0.0000000 0.20544352 0.20544352
## Steroid 0.00000000 0.0000000 0.00000000 0.00000000
## Systemic Vancomycin 0.59828195 0.9072768 0.77392588 0.11364321
## Tacrolimus 0.15097027 0.2264554 0.00000000 0.22645541
## Cefepime 0.74607472 0.9422658 1.13838621 0.15757216
## Metronidazole 0.69063384 0.8741734 1.05278172 0.14494641
## Piperacillin/Tazobactam 0.24692356 0.3691067 0.24712083 0.12454313
## Rifaximin 0.95410379 0.5883952 1.51966251 0.75425370
## Ceftriaxone 0.66754595 0.8263939 1.00552172 0.17072227
## Ciprofloxacin 0.32802581 0.1199520 0.49164428 0.37248114
## Gentamicin 0.21629523 0.3244428 0.32444284 0.00000000
## Tobramicin 0.11043478 0.0000000 0.16565217 0.16565217
## Daptomycin 0.28669638 0.2309401 0.40269363 0.22645541
## Meropenem 0.33331297 0.4714045 0.25118429 0.27735010
## Oral Vancomycin 0.04631503 0.0000000 0.06947254 0.06947254
abx_div_tab1_2 <- print(abx_div_tab1_1, nonnormal = tableone_skewed,
formatOptions = list(big.mark = ","))## Stratified by diversity_group
## Low Diversity Medium Diversity High Diversity
## n 40 40 27
## Basiliximab = 1 (%) 31 ( 77.5) 25 ( 62.5) 14 ( 51.9)
## Mycophenolate = 1 (%) 33 ( 82.5) 33 ( 82.5) 20 ( 74.1)
## Steroid = 1 (%) 40 (100.0) 40 (100.0) 27 (100.0)
## Systemic Vancomycin = 1 (%) 33 ( 82.5) 17 ( 42.5) 13 ( 48.1)
## Tacrolimus = 1 (%) 40 (100.0) 39 ( 97.5) 27 (100.0)
## Cefepime = 1 (%) 27 ( 67.5) 10 ( 25.0) 5 ( 18.5)
## Metronidazole = 1 (%) 29 ( 72.5) 13 ( 32.5) 7 ( 25.9)
## Piperacillin/Tazobactam = 1 (%) 32 ( 80.0) 37 ( 92.5) 24 ( 88.9)
## Rifaximin = 1 (%) 30 ( 75.0) 19 ( 47.5) 4 ( 14.8)
## Ceftriaxone = 1 (%) 19 ( 47.5) 5 ( 12.5) 2 ( 7.4)
## Ciprofloxacin = 1 (%) 10 ( 25.0) 8 ( 20.0) 2 ( 7.4)
## Gentamicin = 1 (%) 2 ( 5.0) 0 ( 0.0) 0 ( 0.0)
## Tobramicin = 1 (%) 3 ( 7.5) 3 ( 7.5) 1 ( 3.7)
## Daptomycin = 1 (%) 3 ( 7.5) 1 ( 2.5) 0 ( 0.0)
## Meropenem = 1 (%) 4 ( 10.0) 0 ( 0.0) 1 ( 3.7)
## Oral Vancomycin = 1 (%) 1 ( 2.5) 1 ( 2.5) 1 ( 3.7)
## Stratified by diversity_group
## p test
## n
## Basiliximab = 1 (%) 0.085
## Mycophenolate = 1 (%) 0.635
## Steroid = 1 (%) NA
## Systemic Vancomycin = 1 (%) 0.001
## Tacrolimus = 1 (%) 0.429
## Cefepime = 1 (%) <0.001
## Metronidazole = 1 (%) <0.001
## Piperacillin/Tazobactam = 1 (%) 0.238
## Rifaximin = 1 (%) <0.001
## Ceftriaxone = 1 (%) <0.001
## Ciprofloxacin = 1 (%) 0.187
## Gentamicin = 1 (%) 0.181
## Tobramicin = 1 (%) 0.788
## Daptomycin = 1 (%) 0.248
## Meropenem = 1 (%) 0.102
## Oral Vancomycin = 1 (%) 0.948
write.csv(abx_div_tab1_2, "./Results/ABX_Table_1_Diversity.csv",
row.names = TRUE) # Saving then reading in the same data allows for an easy way to adjust p-values, since it loads the object as a dataframe
# Need to adjust pvalues and arrange properly....hence the
# multiple dataframes below
abx_div_tab1_2_padjust1 <- read.csv("./Results/ABX_Table_1_Diversity.csv") %>%
dplyr::rename(` ` = X, `Low Diversity` = Low.Diversity, `Medium Diversity` = Medium.Diversity,
`High Diversity` = High.Diversity)
abx_div_tab1_2_padjust2 <- abx_div_tab1_2_padjust1 %>%
mutate(` ` = factor(` `, levels = abx_div_tab1_2_padjust1$` `))
abx_div_tab1_2_padjust3 <- abx_div_tab1_2_padjust1 %>%
mutate(test = ifelse(!is.na(p) & is.na(test), "chi.sq", test)) %>%
group_by(test) %>%
rstatix::adjust_pvalue(p.col = "p", method = "BH") %>%
ungroup() %>%
mutate(` ` = factor(` `, abx_div_tab1_2_padjust2$` `)) %>%
arrange(` `) %>%
mutate(p = ifelse(is.na(p), "", p), p.adj = ifelse(is.na(p.adj),
"", p.adj))
# Read in csv to then append adjusted pvalues
write.csv(abx_div_tab1_2_padjust3, "./Results/ABX_Table_1_Diversity_padjust.csv",
row.names = FALSE)div_card_tot <- card2 %>% left_join(card_dict)
# Put in order of increasing diversity
# Use qual metabolomic data order
card_heatmap_column_order <- heatmap_data %>%
ungroup() %>%
filter(grepl(x = patientID, pattern = "^lt")) %>%
group_by(patientID) %>%
dplyr::slice(1) %>%
ungroup() %>%
left_join(alpha_shannon) %>%
group_by(db) %>%
arrange(db, Shannon) %>%
ungroup() %>%
distinct(sampleID) %>%
pull(sampleID)
# AMRGeneFamily level
div_card_family_tot_0 <-
div_card_tot %>%
filter(sampleID %in% card_heatmap_column_order) %>%
filter(grepl(DrugClass, pattern = "glycopeptide|oxazolidinone")#,
# !grepl(ResistanceMechanism, pattern = "target")
) %>%
group_by(sampleID, AMRGeneFamily) %>%
mutate(rpkm_mean = mean(rpkm)) %>%
dplyr::slice_max(rpkm_mean, n = 1, with_ties = F) %>%
ungroup() %>%
pivot_wider(id_cols = c(sampleID, rpkm_mean), names_from = "AMRGeneFamily", values_from = "rpkm", values_fill = 0) %>%
mutate_if(is.numeric, ~replace(., is.na(.), 0)) %>%
select(where(~ any(. != 0))) %>%
pivot_longer(-c(sampleID, rpkm_mean), names_to = "AMRGeneFamily", values_to = "rpkm") %>%
group_by(sampleID, AMRGeneFamily) %>%
slice_max(rpkm, n = 1, with_ties = F) %>%
ungroup() %>%
left_join(metaphlan_df_sumry %>% select(sampleID, diversity_group)) %>%
drop_na(diversity_group, AMRGeneFamily) %>%
group_by(AMRGeneFamily) %>%
filter(any(rpkm != 0)) # %>% ungroup() %>% distinct(sampleID)
# group_by(AMRGeneFamily, diversity_group) %>%
# filter(n_distinct(rpkm) > 1) # Only keep samples with more than 2 unique values (helps remove the instance where all observations are 0 except 1)
# Perform stats to find any significant differences
div_card_family_tot_0_stats <-
div_card_family_tot_0 %>%
ungroup() %>%
drop_na() %>%
mutate(diversity_group = factor(diversity_group, levels = c("Low Diversity", "Medium Diversity", "High Diversity"))) %>%
group_by(AMRGeneFamily) %>%
rstatix::kruskal_test(rpkm~diversity_group) %>%
rstatix::adjust_pvalue(method = "BH") #%>%
# filter(p <= 0.05)
# filter(p.adj <= 0.05)
div_card_family_tot_0_mat <-
card_heatmap_column_order %>%
as.data.frame() %>%
dplyr::rename('sampleID' = ".") %>% #ungroup() %>% distinct(sampleID)
left_join(div_card_tot) %>%
filter(grepl(DrugClass, pattern = "glycopeptide|oxazolidinone") | sampleID %in% card_heatmap_column_order
) %>%
group_by(sampleID, AMRGeneFamily) %>%
mutate(rpkm_mean = mean(rpkm)) %>%
dplyr::slice_max(rpkm_mean, n = 1, with_ties = F) %>%
ungroup() %>%
mutate(rpkm = case_when(rpkm == 0 ~ "0",
between(rpkm, 0, 10) ~ "0 - 10",
between(rpkm, 10, 50) ~ "10 - 50",
between(rpkm, 50, 100) ~ "50 - 100",
TRUE ~ "100+"
)) %>%
left_join(metaphlan_df_sumry %>% select(sampleID, diversity_group)) %>%
pivot_wider(id_cols = c(sampleID, diversity_group), names_from = "AMRGeneFamily", values_from = "rpkm") %>%
# mutate_if(is.numeric, ~replace(., is.na(.), 0)) %>%
mutate_if(is.character, ~replace(., is.na(.), "0")) %>%
# select(where(~ any(. != "0"))) %>%
pivot_longer(!c(sampleID, diversity_group), names_to = "AMRGeneFamily", values_to = "rpkm") %>%
group_by(sampleID, AMRGeneFamily) %>%
dplyr::slice_max(diversity_group, n = 1, with_ties = F) %>%
ungroup() %>%
right_join(div_card_family_tot_0_stats %>% select(AMRGeneFamily)) %>%
pivot_wider(id_cols = sampleID, names_from = "AMRGeneFamily", values_from = "rpkm") %>%
column_to_rownames(var = "sampleID")
# Create labels
div_card_family_heatmap_labels <-
div_card_family_tot_0_mat %>%
rownames_to_column(var = "sampleID") %>%
left_join(metaphlan_df_sumry %>% select(sampleID, diversity_group)) %>%
group_by(sampleID) %>%
dplyr::slice_max(diversity_group, n = 1, with_ties = F) %>%
right_join(div_card_family_tot_0_mat %>% rownames_to_column(var = "sampleID")) %>%
select(diversity_group) %>%
mutate(diversity_group = factor(diversity_group, levels = c("Low Diversity", "Medium Diversity", "High Diversity"))) %>%
pull(diversity_group)
# Create groupings of CARD classes
div_card_family_heatmap_groups <-
div_card_family_tot_0_mat %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "AMRGeneFamily") %>%
left_join(div_card_family_tot_0_stats %>% select(AMRGeneFamily, p.adj)) %>%
arrange(p.adj) %>%
select(-p.adj) %>%
left_join(card_dict %>% select(AMRGeneFamily, DrugClass)) %>%
select(AMRGeneFamily, DrugClass) %>%
mutate(DrugClass2 = case_when(grepl(DrugClass, pattern = "carbapenem|penam") ~ "Carbapenem",
DrugClass == "glycopeptide antibiotic" ~ "Vancomycin",
grepl(DrugClass, pattern = "oxazolidinone antibiotic") ~ "Linezolid",
grepl(DrugClass, pattern = "fluoroquinolone") ~ "Fluoroquinolone",
TRUE ~ "Multi-Drug"
)) %>%
group_by(AMRGeneFamily) %>%
dplyr::slice(1) %>%
left_join(div_card_family_tot_0_stats) %>%
arrange(p.adj)
# Create row annotation of CARD classes
div_card_family_heatmap_signif <-
div_card_family_tot_0_mat %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "AMRGeneFamily") %>%
left_join(div_card_family_tot_0_stats) %>%
arrange(p.adj) %>%
mutate(p.adj = case_when(p.adj > 0.05 ~ "NS",
p.adj <= 0.05 ~ "p < 0.05",
p.adj <= 0.01 ~ "p < 0.01",
p.adj <= 0.001 ~ "p < 0.0001"),
p.adj = as.factor(p.adj),
col = case_when(p.adj == "NS" ~ "gray80",
p.adj == "p < 0.05" ~ "#98e2fa",
p.adj == "p < 0.01" ~ "#44cffc",
p.adj == "p < 0.0001" ~ "#03befc")) %>%
select(AMRGeneFamily, Significance = p.adj) %>%
inner_join(card_dict %>%
select(AMRGeneFamily) %>%
group_by(AMRGeneFamily) %>%
dplyr::slice(1)) %>%
column_to_rownames(var = "AMRGeneFamily")
pcol <- as.vector(list(c("NS" = "gray80",
"p < 0.05" = "#98e2fa",
"p < 0.01" = "#44cffc",
"p < 0.0001" = "#03befc")))[[1]]
gg_div_card_family_heatmap <-
div_card_family_tot_0_mat %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "AMRGeneFamily") %>%
left_join(div_card_family_tot_0_stats %>% select(AMRGeneFamily, p.adj)) %>%
arrange(p.adj) %>%
select(-p.adj) %>%
column_to_rownames(var = "AMRGeneFamily") %>% #nrow()
as.matrix() %>%
Heatmap(
name = "RPKM",
col = c("0" = "#f0f4f5",
"0 - 10" = "#edb4de",
"10 - 50" = "#db8cc6",
"50 - 100" = "#c96bb0",
"100+" = "#9c3380"),
rect_gp = gpar(col = "black", lwd = 0.2),
column_names_gp = grid::gpar(fontsize = 8),
column_gap = unit(6, "mm"),
column_split = div_card_family_heatmap_labels,
column_order = card_heatmap_column_order,
column_title_gp = gpar(fontsize = 14),
column_title_rot = 0,
cluster_columns = FALSE,
show_column_names = FALSE,
show_column_dend = FALSE,
row_names_gp = gpar(fontsize = 6),
row_gap = unit(3.5, "mm"),
row_names_side = c("left"),
row_order = rownames(div_card_family_heatmap_signif),
row_split = div_card_family_heatmap_groups$DrugClass2,
row_title_rot = 0,
cluster_rows = FALSE,
show_row_dend = FALSE,
right_annotation = rowAnnotation(df = div_card_family_heatmap_signif,
show_annotation_name = F,
col = list(Significance = pcol)
)
# row_dend_width = unit(4, "in"),
# heatmap_width = unit(12, "in")
)
gg_div_card_family_heatmappdf(file = "./Results/Diversity_CARD.pdf", height = 4, width = 12)
gg_div_card_family_heatmap
dev.off()## quartz_off_screen
## 2
ecoc_card_tot <- card2 %>% left_join(card_dict)
# Put in order of increasing enterococcus sp
# Use qual metabolomic data order
ecoc_card_heatmap_column_order <- peri_matrix_all %>%
ungroup() %>%
arrange(enterococcus_rel_abundance) %>%
distinct(sampleID) %>%
pull(sampleID)
# AMRGeneFamily level
ecoc_card_family_tot_0 <-
ecoc_card_tot %>%
filter(sampleID %in% ecoc_card_heatmap_column_order,
grepl(DrugClass, pattern = "glycopeptide")) %>%
group_by(sampleID, AMRGeneFamily) %>%
mutate(rpkm_mean = mean(rpkm)) %>%
dplyr::slice_max(rpkm_mean, n = 1, with_ties = F) %>%
ungroup() %>%
pivot_wider(id_cols = c(sampleID, rpkm_mean), names_from = "AMRGeneFamily", values_from = "rpkm", values_fill = 0) %>%
mutate_if(is.numeric, ~replace(., is.na(.), 0)) %>%
select(where(~ any(. != 0))) %>%
pivot_longer(-c(sampleID, rpkm_mean), names_to = "AMRGeneFamily", values_to = "rpkm") %>%
group_by(sampleID, AMRGeneFamily) %>%
slice_max(rpkm, n = 1, with_ties = F) %>%
ungroup() %>%
right_join(peri_matrix_all %>%
mutate(domination = case_when(enterococcus_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[2] #|
# enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1]
~ "Expansion",
TRUE ~ "No Expansion")) %>%
select(sampleID, domination)) %>%
drop_na(domination, AMRGeneFamily) %>%
group_by(AMRGeneFamily) %>%
filter(any(rpkm != 0)) # %>% ungroup() %>% distinct(sampleID)
# group_by(AMRGeneFamily, domination) %>%
# filter(n_distinct(rpkm) > 1) # Only keep samples with more than 2 unique values (helps remove the instance where all observations are 0 except 1)
# Perform stats to find any significant differences
ecoc_card_family_tot_0_stats <-
ecoc_card_family_tot_0 %>%
ungroup() %>%
drop_na() %>%
mutate(domination = factor(domination, levels = c("No Expansion", "Expansion"))) %>%
group_by(AMRGeneFamily) %>%
rstatix::wilcox_test(rpkm~domination) %>%
rstatix::adjust_pvalue(method = "BH") #%>%
# filter(p <= 0.05)
# filter(p.adj <= 0.05)
ecoc_card_family_tot_0_mat <-
ecoc_card_heatmap_column_order %>%
as.data.frame() %>%
dplyr::rename('sampleID' = ".") %>% #ungroup() %>% distinct(sampleID)
left_join(ecoc_card_tot) %>%
filter(grepl(DrugClass, pattern = "glycopeptide|oxazolidinone") | sampleID %in% ecoc_card_heatmap_column_order
) %>%
group_by(sampleID, AMRGeneFamily) %>%
mutate(rpkm_mean = mean(rpkm)) %>%
dplyr::slice_max(rpkm_mean, n = 1, with_ties = F) %>%
ungroup() %>%
mutate(rpkm = case_when(rpkm == 0 ~ "0",
between(rpkm, 0, 10) ~ "0 - 10",
between(rpkm, 10, 50) ~ "10 - 50",
between(rpkm, 50, 100) ~ "50 - 100",
TRUE ~ "100+"
)) %>%
left_join(peri_matrix_all %>%
mutate(domination = case_when(enterococcus_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[2] #|
# enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1]
~ "Expansion",
TRUE ~ "No Expansion")) %>%
select(sampleID, domination)) %>%
pivot_wider(id_cols = c(sampleID, domination), names_from = "AMRGeneFamily", values_from = "rpkm") %>%
# mutate_if(is.numeric, ~replace(., is.na(.), 0)) %>%
mutate_if(is.character, ~replace(., is.na(.), "0")) %>%
# select(where(~ any(. != "0"))) %>%
pivot_longer(!c(sampleID, domination), names_to = "AMRGeneFamily", values_to = "rpkm") %>%
group_by(sampleID, AMRGeneFamily) %>%
dplyr::slice_max(domination, n = 1, with_ties = F) %>%
ungroup() %>%
right_join(ecoc_card_family_tot_0_stats %>% select(AMRGeneFamily)) %>%
pivot_wider(id_cols = sampleID, names_from = "AMRGeneFamily", values_from = "rpkm") %>%
column_to_rownames(var = "sampleID")
# Create labels
ecoc_card_family_heatmap_labels <-
ecoc_card_family_tot_0_mat %>%
rownames_to_column(var = "sampleID") %>%
left_join(peri_matrix_all %>%
mutate(domination = case_when(enterococcus_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[2] #|
# enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1]
~ "Expansion",
TRUE ~ "No Expansion")) %>%
select(sampleID, domination)) %>%
group_by(sampleID) %>%
dplyr::slice_max(domination, n = 1, with_ties = F) %>%
right_join(ecoc_card_family_tot_0_mat %>% rownames_to_column(var = "sampleID")) %>%
select(domination) %>%
mutate(domination = factor(domination, levels = c("No Expansion", "Expansion"))) %>%
pull(domination)
# Create groupings of CARD classes
ecoc_card_family_heatmap_groups <-
ecoc_card_family_tot_0_mat %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "AMRGeneFamily") %>%
left_join(ecoc_card_family_tot_0_stats %>% select(AMRGeneFamily, p.adj)) %>%
arrange(p.adj) %>%
select(-p.adj) %>%
left_join(card_dict %>% select(AMRGeneFamily, DrugClass)) %>%
select(AMRGeneFamily, DrugClass) %>%
mutate(DrugClass2 = case_when(grepl(DrugClass, pattern = "carbapenem|penam") ~ "Carbapenem",
DrugClass == "glycopeptide antibiotic" ~ "Vancomycin",
grepl(DrugClass, pattern = "oxazolidinone antibiotic") ~ "Linezolid",
grepl(DrugClass, pattern = "fluoroquinolone") ~ "Fluoroquinolone",
TRUE ~ "Multi-Drug"
)) %>%
group_by(AMRGeneFamily) %>%
dplyr::slice(1) %>%
left_join(ecoc_card_family_tot_0_stats) %>%
arrange(p.adj)
# Create row annotation of CARD classes
ecoc_card_family_heatmap_signif <-
ecoc_card_family_tot_0_mat %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "AMRGeneFamily") %>%
left_join(ecoc_card_family_tot_0_stats) %>%
arrange(p.adj) %>%
mutate(p.adj = case_when(p.adj > 0.05 ~ "NS",
p.adj <= 0.05 ~ "p < 0.05",
p.adj <= 0.01 ~ "p < 0.01",
p.adj <= 0.001 ~ "p < 0.0001"),
p.adj = as.factor(p.adj),
col = case_when(p.adj == "NS" ~ "gray80",
p.adj == "p < 0.05" ~ "#98e2fa",
p.adj == "p < 0.01" ~ "#44cffc",
p.adj == "p < 0.0001" ~ "#03befc")) %>%
select(AMRGeneFamily, Significance = p.adj) %>%
inner_join(card_dict %>%
select(AMRGeneFamily) %>%
group_by(AMRGeneFamily) %>%
dplyr::slice(1)) %>%
column_to_rownames(var = "AMRGeneFamily")
gg_ecoc_card_family_heatmap <-
ecoc_card_family_tot_0_mat %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "AMRGeneFamily") %>%
left_join(ecoc_card_family_tot_0_stats %>% select(AMRGeneFamily, p.adj)) %>%
arrange(p.adj) %>%
select(-p.adj) %>%
column_to_rownames(var = "AMRGeneFamily") %>% #nrow()
as.matrix() %>%
Heatmap(
name = "RPKM",
col = c("0" = "#f0f4f5",
"0 - 10" = "#edb4de",
"10 - 50" = "#db8cc6",
"50 - 100" = "#c96bb0",
"100+" = "#9c3380"),
rect_gp = gpar(col = "black", lwd = 0.2),
column_names_gp = grid::gpar(fontsize = 8),
column_gap = unit(6, "mm"),
column_split = ecoc_card_family_heatmap_labels,
column_order = ecoc_card_heatmap_column_order,
column_title_gp = gpar(fontsize = 14),
column_title_rot = 0,
cluster_columns = FALSE,
show_column_names = FALSE,
show_column_dend = FALSE,
row_names_gp = gpar(fontsize = 6),
row_gap = unit(3.5, "mm"),
row_names_side = c("left"),
row_order = rownames(ecoc_card_family_heatmap_signif),
row_split = ecoc_card_family_heatmap_groups$DrugClass2,
row_title_rot = 0,
cluster_rows = FALSE,
show_row_dend = FALSE,
right_annotation = rowAnnotation(df = ecoc_card_family_heatmap_signif,
show_annotation_name = F,
col = list(Significance = pcol)
)
# row_dend_width = unit(4, "in"),
# heatmap_width = unit(12, "in")
)
gg_ecoc_card_family_heatmappdf(file = "./Results/Ecoc_Expan_CARD.pdf", height = 4, width = 12)
gg_ecoc_card_family_heatmap
dev.off()## quartz_off_screen
## 2
ebac_card_tot <- card2 %>% left_join(card_dict)
# Put in order of increasing enterobacterales sp
# Use qual metabolomic data order
ebac_card_heatmap_column_order <- peri_matrix_all %>%
ungroup() %>%
arrange(enterobacterales_rel_abundance) %>%
distinct(sampleID) %>%
pull(sampleID)
# AMRGeneFamily level
ebac_card_family_tot_0 <-
ebac_card_tot %>%
filter(sampleID %in% ebac_card_heatmap_column_order,
grepl(DrugClass, pattern = "carbapenem|fluoroquinolone"),
!grepl(DrugClass, pattern = "oxazolidinone")) %>%
group_by(sampleID, AMRGeneFamily) %>%
mutate(rpkm_mean = mean(rpkm)) %>%
dplyr::slice_max(rpkm_mean, n = 1, with_ties = F) %>%
ungroup() %>%
pivot_wider(id_cols = c(sampleID, rpkm_mean), names_from = "AMRGeneFamily", values_from = "rpkm", values_fill = 0) %>%
mutate_if(is.numeric, ~replace(., is.na(.), 0)) %>%
select(where(~ any(. != 0))) %>%
pivot_longer(-c(sampleID, rpkm_mean), names_to = "AMRGeneFamily", values_to = "rpkm") %>%
group_by(sampleID, AMRGeneFamily) %>%
slice_max(rpkm, n = 1, with_ties = F) %>%
ungroup() %>%
right_join(peri_matrix_all %>%
mutate(domination = case_when(enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1]
~ "Expansion",
TRUE ~ "No Expansion")) %>%
select(sampleID, domination)) %>%
drop_na(domination, AMRGeneFamily) %>%
group_by(AMRGeneFamily) %>%
filter(any(rpkm != 0)) # %>% ungroup() %>% distinct(sampleID)
# group_by(AMRGeneFamily, domination) %>%
# filter(n_distinct(rpkm) > 1) # Only keep samples with more than 2 unique values (helps remove the instance where all observations are 0 except 1)
# Perform stats to find any significant differences
ebac_card_family_tot_0_stats <-
ebac_card_family_tot_0 %>%
ungroup() %>%
drop_na() %>%
mutate(domination = factor(domination, levels = c("No Expansion", "Expansion"))) %>%
group_by(AMRGeneFamily) %>%
rstatix::wilcox_test(rpkm~domination) %>%
rstatix::adjust_pvalue(method = "BH") #%>%
# filter(p <= 0.05)
# filter(p.adj <= 0.05)
ebac_card_family_tot_0_mat <-
ebac_card_heatmap_column_order %>%
as.data.frame() %>%
dplyr::rename('sampleID' = ".") %>% #ungroup() %>% distinct(sampleID)
left_join(ebac_card_tot) %>%
filter(grepl(DrugClass, pattern = "glycopeptide") | sampleID %in% ebac_card_heatmap_column_order,
!grepl(DrugClass, pattern = "oxazolidinone")) %>%
group_by(sampleID, AMRGeneFamily) %>%
mutate(rpkm_mean = mean(rpkm)) %>%
dplyr::slice_max(rpkm_mean, n = 1, with_ties = F) %>%
ungroup() %>%
mutate(rpkm = case_when(rpkm == 0 ~ "0",
between(rpkm, 0, 10) ~ "0 - 10",
between(rpkm, 10, 50) ~ "10 - 50",
between(rpkm, 50, 100) ~ "50 - 100",
TRUE ~ "100+"
)) %>%
left_join(peri_matrix_all %>%
mutate(domination = case_when(enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1]
~ "Expansion",
TRUE ~ "No Expansion")) %>%
select(sampleID, domination)) %>%
pivot_wider(id_cols = c(sampleID, domination), names_from = "AMRGeneFamily", values_from = "rpkm") %>%
# mutate_if(is.numeric, ~replace(., is.na(.), 0)) %>%
mutate_if(is.character, ~replace(., is.na(.), "0")) %>%
# select(where(~ any(. != "0"))) %>%
pivot_longer(!c(sampleID, domination), names_to = "AMRGeneFamily", values_to = "rpkm") %>%
group_by(sampleID, AMRGeneFamily) %>%
dplyr::slice_max(domination, n = 1, with_ties = F) %>%
ungroup() %>%
right_join(ebac_card_family_tot_0_stats %>% select(AMRGeneFamily)) %>%
pivot_wider(id_cols = sampleID, names_from = "AMRGeneFamily", values_from = "rpkm") %>%
column_to_rownames(var = "sampleID")
# Create labels
ebac_card_family_heatmap_labels <-
ebac_card_family_tot_0_mat %>%
rownames_to_column(var = "sampleID") %>%
left_join(peri_matrix_all %>%
mutate(domination = case_when(enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1]
~ "Expansion",
TRUE ~ "No Expansion")) %>%
select(sampleID, domination)) %>%
group_by(sampleID) %>%
dplyr::slice_max(domination, n = 1, with_ties = F) %>%
right_join(ebac_card_family_tot_0_mat %>% rownames_to_column(var = "sampleID")) %>%
select(domination) %>%
mutate(domination = factor(domination, levels = c("No Expansion", "Expansion"))) %>%
pull(domination)
# Create groupings of CARD classes
ebac_card_family_heatmap_groups <-
ebac_card_family_tot_0_mat %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "AMRGeneFamily") %>%
left_join(ebac_card_family_tot_0_stats %>% select(AMRGeneFamily, p.adj)) %>%
arrange(p.adj) %>%
select(-p.adj) %>%
left_join(card_dict %>% select(AMRGeneFamily, DrugClass)) %>%
select(AMRGeneFamily, DrugClass) %>%
mutate(DrugClass2 = case_when(grepl(DrugClass, pattern = "carbapenem|penam") ~ "Carbapenem",
DrugClass == "glycopeptide antibiotic" ~ "Vancomycin",
# grepl(DrugClass, pattern = "oxazolidinone") ~ "Linezolid",
grepl(DrugClass, pattern = "fluoroquinolone") ~ "Fluoroquinolone",
TRUE ~ "Multi-Drug"
)) %>%
group_by(AMRGeneFamily) %>%
dplyr::slice(1) %>%
left_join(ebac_card_family_tot_0_stats) %>%
arrange(p.adj)
# Create row annotation of CARD classes
ebac_card_family_heatmap_signif <-
ebac_card_family_tot_0_mat %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "AMRGeneFamily") %>%
left_join(ebac_card_family_tot_0_stats) %>%
arrange(p.adj) %>%
mutate(p.adj = case_when(p.adj > 0.05 ~ "NS",
p.adj <= 0.05 ~ "p < 0.05",
p.adj <= 0.01 ~ "p < 0.01",
p.adj <= 0.001 ~ "p < 0.0001"),
p.adj = as.factor(p.adj),
col = case_when(p.adj == "NS" ~ "gray80",
p.adj == "p < 0.05" ~ "#98e2fa",
p.adj == "p < 0.01" ~ "#44cffc",
p.adj == "p < 0.0001" ~ "#03befc")) %>%
select(AMRGeneFamily, Significance = p.adj) %>%
inner_join(card_dict %>%
select(AMRGeneFamily) %>%
group_by(AMRGeneFamily) %>%
dplyr::slice(1)) %>%
column_to_rownames(var = "AMRGeneFamily")
gg_ebac_card_family_heatmap <-
ebac_card_family_tot_0_mat %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "AMRGeneFamily") %>%
left_join(ebac_card_family_tot_0_stats %>% select(AMRGeneFamily, p.adj)) %>%
arrange(p.adj) %>%
select(-p.adj) %>%
column_to_rownames(var = "AMRGeneFamily") %>% #nrow()
as.matrix() %>%
Heatmap(
name = "RPKM",
col = c("0" = "#f0f4f5",
"0 - 10" = "#edb4de",
"10 - 50" = "#db8cc6",
"50 - 100" = "#c96bb0",
"100+" = "#9c3380"),
rect_gp = gpar(col = "black", lwd = 0.2),
column_names_gp = grid::gpar(fontsize = 8),
column_gap = unit(6, "mm"),
column_split = ebac_card_family_heatmap_labels,
column_order = ebac_card_heatmap_column_order,
column_title_gp = gpar(fontsize = 14),
column_title_rot = 0,
cluster_columns = FALSE,
show_column_names = FALSE,
show_column_dend = FALSE,
row_names_gp = gpar(fontsize = 6),
row_gap = unit(3.5, "mm"),
row_names_side = c("left"),
row_order = rownames(ebac_card_family_heatmap_signif),
row_split = ebac_card_family_heatmap_groups$DrugClass2,
row_title_rot = 0,
cluster_rows = FALSE,
show_row_dend = FALSE,
right_annotation = rowAnnotation(df = ebac_card_family_heatmap_signif,
show_annotation_name = F,
col = list(Significance = pcol)
)
# row_dend_width = unit(4, "in"),
# heatmap_width = unit(12, "in")
)
gg_ebac_card_family_heatmappdf(file = "./Results/Ebac_Expan_CARD.pdf", height = 6, width = 12)
gg_ebac_card_family_heatmap
dev.off()## quartz_off_screen
## 2
sample_days <- metaphlan_df_sumry %>%
group_by(sampleID) %>%
slice(1) %>%
filter(db == "Liver Transplant") %>%
select(sampleID, diversity_group) %>%
left_join(sample_lookup %>%
select(sampleID, sample_days_from_transplant)) %>%
ungroup()
# Quartiles of samples
clin_quartiles <- sample_days %>%
select(sample_days_from_transplant) %>%
summarise(minimum = min(sample_days_from_transplant, na.rm = T),
lower = unname(quantile(sample_days_from_transplant,
probs = 0.25, na.rm = T)), median = unname(quantile(sample_days_from_transplant,
probs = 0.5, na.rm = T)), upper = unname(quantile(sample_days_from_transplant,
probs = 0.75, na.rm = T)), maximum = max(sample_days_from_transplant,
na.rm = T)) %>%
mutate(placeholder = 1) %>%
pivot_longer(!placeholder, names_to = "ranges", values_to = "value")
sample_days_histogram <- sample_days %>%
ggplot(., aes(x = sample_days_from_transplant)) + geom_histogram(color = "black",
fill = "grey", binwidth = 1) + geom_vline(aes(xintercept = 0),
color = "orangered", linetype = "solid", size = 1.2) + geom_vline(aes(xintercept = median(sample_days_from_transplant,
na.rm = TRUE)), color = "cyan2", linetype = "dashed", size = 1.2) +
geom_text(aes(x = median(sample_days_from_transplant, na.rm = TRUE),
y = 12.5), color = "cyan2", label = paste0("Median = ",
clin_quartiles %>%
filter(ranges == "median") %>%
select(value), " days"), nudge_x = 4) + geom_text(label = "Transplant Date = Day 0",
x = -4, y = 12.5, color = "orangered") + theme_bw() + theme(panel.grid = eb(),
axis.text = et(color = "black", size = 12), axis.title = et(color = "black",
size = 14), legend.position = "none") + scale_x_continuous(breaks = seq(-7,
30, 5), limits = c(-7, 35)) + ylab("Number of Samples\n")
sample_days_boxplot <- sample_days %>%
ggplot(., aes(x = sample_days_from_transplant, y = diversity_group,
fill = diversity_group)) + geom_vline(aes(xintercept = 0),
color = "orangered", linetype = "solid", size = 1.2) + geom_vline(aes(xintercept = median(sample_days_from_transplant,
na.rm = TRUE)), color = "cyan2", linetype = "dashed", size = 1.2) +
geom_violin() + geom_boxplot(alpha = 0.5, width = 0.15, outlier.shape = NA,
fill = "white", color = "white") + stat_compare_means(comparisons = list(c("High Diversity",
"Medium Diversity"), c("High Diversity", "Low Diversity"),
c("Medium Diversity", "Low Diversity")), bracket.size = 0.3,
step.increase = 0.07) + theme_bw() + theme(panel.grid = eb(),
axis.text.x = et(color = "black", size = 12), axis.text.y = et(color = "black",
size = 12), axis.ticks.y = eb(), axis.title = et(color = "black",
size = 14), legend.position = "none") + scale_x_continuous(breaks = seq(-7,
30, 5), limits = c(-7, 35)) + scale_fill_manual(values = c("#3A001E",
"#8A0246", "#C20463")) + ylab("Diversity Groups\n") + xlab("\nDays to Sample")
pdf(file = "./Results/Supplemental_Figure_1.pdf", height = 10,
width = 12, onefile = F)
gg.stack(sample_days_histogram, sample_days_boxplot)
dev.off()## quartz_off_screen
## 2
# Percent of samples with > 90% of a single taxon
metaphlan_df2 %>%
left_join(metaphlan_df_sumry %>%
select(sampleID, diversity_group)) %>%
filter(grepl(x = sampleID, pattern = "^lt")) %>%
group_by(sampleID) %>%
filter(pctseqs > 0.9) %>%
select(-y.text, -db) %>%
write.csv(., "./Results/90percent_taxon.csv", row.names = FALSE)diversity_infx <- peri_criteria_all %>%
left_join(metaphlan_df_sumry %>%
mutate(patientID = str_extract(string = sampleID, pattern = "lt-[0-9]+")) %>%
select(patientID, diversity_group)) %>%
group_by(patientID, eday) %>%
arrange(-infx_stool) %>%
dplyr::slice(1) %>%
ungroup() %>%
select(patientID, sampleID, diversity_group, bact_infection_present, infx_stool, organism1, micro1.factor) %>%
distinct() %>%
mutate(organism1 = gsub(x = organism1, pattern = "\\s+", replacement = ""),
organism1 = str_to_lower(string = organism1),
organism1 = ifelse(grepl(x = organism1, pattern = "enterococcus|enterobacterales|klebsiella|escherichia|citrobacter|proteus|staphyl|clostrid|pseudo|steno|bacteroides|helico"), organism1, "Culture Negative")) %>%
group_by(patientID, sampleID, infx_stool) %>%
mutate(`Enterococcus faecium` = grepl(x = organism1, pattern = "enterococcusfaecium", ignore.case = T),
`Enterococcus faecalis` = grepl(x = organism1, pattern = "enterococcusfaecalis", ignore.case = T),
`Enterococcus avium` = grepl(x = organism1, pattern = "enterococcusavium", ignore.case = T),
`Enterococcus gallinarum` = grepl(x = organism1, pattern = "enterococcusgallinarum", ignore.case = T),
`Klebsiella pneumoniae` = grepl(x = organism1, pattern = "klebsiellapneumoniae", ignore.case = T),
`Enterobacter cloaceae` = grepl(x = organism1, pattern = "enterobactercloaceae", ignore.case = T),
`Escherichia coli` = grepl(x = organism1, pattern = "escherichiacoli", ignore.case = T),
`Citrobacter freundii` = grepl(x = organism1, pattern = "citrobacterfreundii", ignore.case = T),
`Proteus mirabilis` = grepl(x = organism1, pattern = "proteusmirabilis", ignore.case = T),
`Staphylococcus aureus` = grepl(x = organism1, pattern = "staphylococcusaureus", ignore.case = T),
`Staphylococcus epidermis` = grepl(x = organism1, pattern = "staphylococcusepidermidis", ignore.case = T),
`Pseudomonas aeruginosa` = grepl(x = organism1, pattern = "pseudomonasaeruginosa", ignore.case = T),
`Stenotrophmonas maltophilia` = grepl(x = organism1, pattern = "stenotrophmonasmaltophilia", ignore.case = T),
`Helicobacter pylori` = grepl(x = organism1, pattern = "helicobacterpylori", ignore.case = T),
`Clostridium difficile` = grepl(x = organism1, pattern = "clostridiumdifficile|clostridioidesdifficile", ignore.case = T),
`Bacteroides sp.` = grepl(x = organism1, pattern = "bacteroides", ignore.case = T),
`Culture Negative` = grepl(x = organism1, pattern = "Culture Negative", ignore.case = T)) %>%
pivot_longer(-c(patientID:micro1.factor), names_to = "organisms", values_to = "org_presence") %>%
mutate(organisms = ifelse(bact_infection_present == "No", "No Bacterial Infection", organisms),
org_presence = ifelse(org_presence == TRUE, 1, 0)) %>%
group_by(sampleID, infx_stool, bact_infection_present, organisms) %>%
dplyr::slice_max(org_presence) %>%
ungroup() %>%
filter(org_presence == 1) %>%
group_by(patientID, sampleID, organisms, org_presence) %>%
dplyr::slice(1) %>%
mutate(sampleID = ifelse(is.na(sampleID), patientID, sampleID)) %>%
mutate(org_presence = ifelse(grepl(pattern = "No", x = bact_infection_present, ignore.case = T), 0, org_presence)) %>%
ungroup() %>%
mutate(organisms = ifelse(bact_infection_present == "Yes" & org_presence == 0, "Other", organisms)) %>%
mutate(organisms = ifelse(grepl(x = organisms, pattern = "enterococcus|klebsiella|escherichia|proteus|citrobacter|culture", ignore.case = TRUE), organisms, "Other Bacterial Infection"),
organisms = ifelse(bact_infection_present == "No", "No Bacterial Infection", organisms),
diversity_group = as.character(diversity_group),
diversity_group = factor(diversity_group, levels = c("Low Diversity", "Medium Diversity", "High Diversity")),
org_presence = ifelse(grepl(pattern = "No", x = bact_infection_present, ignore.case = T), 0, 1))
diversity_infx_stats <-
diversity_infx %>%
ungroup() %>%
group_by(diversity_group) %>%
summarise(n = nrow(.),
org_presence_1 = sum(org_presence == 1),
org_presence_0 = n - org_presence_1) %>%
select(-n) %>%
column_to_rownames(var = "diversity_group") %>%
# t() %>%
rstatix::pairwise_prop_test(p.adjust.method = "BH") %>%
mutate(p = ifelse(p < 0.001, "p < 0.001", paste("p = ", round(p, 2))),
p.adj = ifelse(p.adj < 0.001, "p.adj < 0.001", paste("p.adj = ", round(p.adj, 2))))
gg_diversity_infx <-
diversity_infx %>%
mutate(diversity_group = as.character(diversity_group),
diversity_group = ifelse(is.na(diversity_group), "No Stool Sample", diversity_group),
diversity_group = factor(diversity_group, levels = c("Low Diversity", "Medium Diversity", "High Diversity", "No Stool Sample")),
organisms = ifelse(grepl(organisms, pattern = "gallinarum"), "Other Bacterial Infection", organisms),
org_colors = case_when(grepl(x = organisms, pattern = "enterococcus faecium", ignore.case = T) ~ 1,
grepl(x = organisms, pattern = "enterococcus faecalis", ignore.case = T) ~ 2,
grepl(x = organisms, pattern = "enterococcus avium", ignore.case = T) ~ 3,
# grepl(x = organisms, pattern = "enterococcus gallinarum", ignore.case = T) ~ 3,
grepl(x = organisms, pattern = "klebsiella pneumoniae", ignore.case = T) ~ 4,
grepl(x = organisms, pattern = "escherichia coli", ignore.case = T) ~ 5,
grepl(x = organisms, pattern = "proteus mirabilis", ignore.case = T) ~ 6,
grepl(x = organisms, pattern = "citrobacter freundii", ignore.case = T) ~ 7,
grepl(x = organisms, pattern = "other bacterial infection|gallinarum", ignore.case = T) ~ 8,
grepl(x = organisms, pattern = "culture negative", ignore.case = T) ~ 9,
grepl(x = diversity_group, pattern = "no stool sample", ignore.case = T) ~ 10,
TRUE ~ 0),
organisms = as.factor(organisms),
organisms = factor(organisms, levels = c("Enterococcus faecium",
"Enterococcus faecalis",
"Enterococcus avium",
"Klebsiella pneumoniae",
"Escherichia coli",
"Proteus mirabilis",
"Citrobacter freundii",
"Other Bacterial Infection",
"Culture Negative",
"No Bacterial Infection",
"No Stool Sample")),
org_colors = factor(org_colors, levels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "0"))) %>%
filter(org_colors != "0") %>%
ggplot(aes(x = diversity_group, y = org_presence, fill = as.factor(org_colors))) +
geom_col(color = "black", size = 0.3) +
theme_bw() +
theme(
axis.text = et(size = 12, color = "black"),
axis.title.x = eb(),
axis.title.y = et(size = 14, color = "black"),
panel.grid = eb(),
panel.border = eb(),
axis.line = el(color = "black")
) +
geom_table_npc(data = diversity_infx_stats %>%
select(` ` = group1, ` ` = group2, p.adj) %>%
mutate(p.adj = gsub(pattern = "p.adj = ", replacement = "", p.adj)),
label = list(diversity_infx_stats %>%
select(` ` = group1, ` ` = group2, p.adj) %>%
mutate(p.adj = gsub(pattern = "p.adj = ", replacement = "", p.adj))),
npcx = 0.975, npcy = 0.975#,
# table.theme = ttheme_gtlight
) +
scale_fill_manual(values = c("#129246",
"#0C7A3A",
"#055C2B",
"#FF0000",
"#CC0404",
"#8A0202",
"#5C0202",
"#E6C66E",
"#BD992D",
"gray85"),
labels = c("Enterococcus faecium",
"Enterococcus faecalis",
"Enterococcus avium",
"Klebsiella pneumoniae",
"Escherichia coli",
"Proteus mirabilis",
"Citrobacter freundii",
"Other Bacterial Infection",
"Culture Negative",
"No Stool Sample")
) +
labs(fill = "Infecting Organism",
y = "Number of Infections\n(Including Reinfections)") +
scale_y_continuous(expand = expansion(add = c(0.25)))
gg_diversity_infx# First samples lookup
vc_first_samps <- readRDS("./Data/validation_cohort_first_samps.rds")
# All sample lookup
vc_all_samps <- readRDS("./Data/validation_cohort_all_samps.rds")
# All samples
vc_peri_criteria_all <- readRDS("./Data/validation_cohort_peri_criteria_all_anon.rds")
# Antibiotics data
vc_abx <- readRDS("./Data/validation_cohort_abx.rds")
# Demographics data
vc_demo <- readRDS("./Data/validation_cohort_demo.rds") %>%
mutate(race = recode(race, `Black or African-American` = "Black/African-American",
`Unknown or Patient unable to respond` = "Unknown"))
# Metagenomic
vc_metaphlan_df <- readRDS("./Data/validation_cohort_metaphlan.rds")
# Peri-transplant samples
vc_metaphlan_peri <- readRDS("./Data/validation_cohort_metahplan_peri.rds")
# Qualitative metabolomics
vc_metab_qual <- readRDS("./Data/validation_cohort_metab_qual.rds")vc_diversity_thresh <- metaphlan_df_sumry %>%
filter(diversity_group != "Healthy Donor") %>%
select(diversity_group, Shannon) %>%
group_by(diversity_group) %>%
dplyr::slice_max(Shannon, with_ties = FALSE)
vc_t_metaphlan <- vc_metaphlan_df %>%
right_join(vc_first_samps %>%
mutate(sampleID = recode(sampleID, `vc-011-02` = "vc-011-01",
`vc-012-02` = "vc-012-01"))) %>%
mutate(db = "Liver Transplant") %>%
select(sampleID, taxid, db, pctseqs, Total) %>%
group_by(sampleID, taxid, pctseqs) %>%
slice(1) %>%
ungroup() %>%
filter(pctseqs >= 1e-04) %>%
group_by(sampleID) %>%
dplyr::add_count(taxid, name = "totalSp") %>%
mutate(sampleID_count = length(unique(sampleID)), spPres = totalSp/sampleID_count) %>%
filter(spPres >= 0.1) %>%
select(-c(Total, sampleID_count, spPres, totalSp)) %>%
group_by(sampleID) %>%
mutate(pctseqs = pctseqs/sum(pctseqs))
vc_t_metaphlan_mat <- vc_t_metaphlan %>%
distinct() %>%
pivot_wider(names_from = "taxid", values_from = "pctseqs",
values_fill = 0) %>%
column_to_rownames(var = "sampleID") %>%
select(-db)
# Alpha Diversity matrix: Shannon
vc_alpha_shannon <- vegan::diversity(vc_t_metaphlan_mat, index = "shannon") %>%
as.data.frame() %>%
rownames_to_column(var = "sampleID") %>%
dplyr::rename(Shannon = ".")
#### Plot Metaphlan Relative Abundance #### MetaPhlAn4
#### Taxonomy
vc_metaphlan_df2 <- vc_t_metaphlan %>%
mutate(db = factor(db, levels = c("Liver Transplant"))) %>%
left_join(tax_lookup) %>%
drop_na(taxid) %>%
arrange(Kingdom, Phylum, Class, Order, Family, Genus) %>%
mutate(Genus = paste0(Phylum, "-", Order, "-", Family, "-",
Genus)) %>%
left_join(vc_alpha_shannon) %>%
group_by(sampleID) %>%
arrange(Genus) %>%
mutate(cum.pct = cumsum(pctseqs), y.text = (cum.pct + c(0,
cum.pct[-length(cum.pct)]))/2) %>%
ungroup() %>%
dplyr::select(-cum.pct)
vc_metaphlan_pal <- getRdpPal(vc_metaphlan_df2)
gg_vc_metaphlan <- vc_metaphlan_df2 %>%
right_join(vc_first_samps %>%
mutate(sampleID = recode(sampleID, `vc-011-02` = "vc-011-01",
`vc-012-02` = "vc-012-01"))) %>%
mutate(diversity_group = case_when(Shannon <= vc_diversity_thresh$Shannon[vc_diversity_thresh$diversity_group ==
"Low Diversity"] ~ "Low Diversity", Shannon > vc_diversity_thresh$Shannon[vc_diversity_thresh$diversity_group ==
"Low Diversity"] & Shannon <= vc_diversity_thresh$Shannon[vc_diversity_thresh$diversity_group ==
"Medium Diversity"] ~ "Medium Diversity", Shannon > vc_diversity_thresh$Shannon[vc_diversity_thresh$diversity_group ==
"Medium Diversity"] ~ "High Diversity")) %>%
drop_na(diversity_group) %>%
mutate(diversity_group = factor(diversity_group, levels = c("Low Diversity",
"Medium Diversity", "High Diversity"))) %>%
ungroup() %>%
mutate(Genus = factor(Genus, levels = unique(Genus))) %>%
group_by(sampleID) %>%
arrange(Genus) %>%
ggplot(aes(x = reorder(sampleID, Shannon), y = pctseqs)) +
geom_bar(stat = "identity", aes(fill = Genus), width = 0.9) +
scale_fill_manual(values = vc_metaphlan_pal) + theme_bw() +
theme(legend.position = "none", axis.text.x = eb(), axis.ticks.x = eb(),
strip.text.x = et(angle = 0, size = 14), strip.background = eb(),
axis.title.y = et(color = "black", size = 14), axis.text.y = et(color = "black",
size = 12), panel.spacing = unit(0.5, "lines"), plot.margin = margin(t = 5,
r = 5, b = 0, l = 5)) + facet_grid(. ~ diversity_group,
scales = "free", space = "free") + scale_y_continuous(expand = expansion(mult = c(0.005,
0.005)), labels = scales::percent_format(accuracy = 1)) +
ylab("MetaPhlAn4 Relative Abundance") + xlab("")
# Color facets
gg_vc_metaphlan_grob <- ggplot_gtable(ggplot_build(gg_vc_metaphlan))
vc_strip_both <- which(grepl("strip-", gg_vc_metaphlan_grob$layout$name))
vc_fills <- diversity_group_colors[c(1:3)]
vc_k <- 1
for (i in vc_strip_both) {
vc_l <- which(grepl("titleGrob", gg_vc_metaphlan_grob$grobs[[i]]$grobs[[1]]$childrenOrder))
gg_vc_metaphlan_grob$grobs[[i]]$grobs[[1]]$children[[vc_l]]$children[[1]]$gp$col <- fills[vc_k]
vc_k <- vc_k + 1
}
gg_vc_shannon <- vc_alpha_shannon %>%
mutate(diversity_group = case_when(Shannon <= vc_diversity_thresh$Shannon[vc_diversity_thresh$diversity_group ==
"Low Diversity"] ~ "Low Diversity", Shannon > vc_diversity_thresh$Shannon[vc_diversity_thresh$diversity_group ==
"Low Diversity"] & Shannon <= vc_diversity_thresh$Shannon[vc_diversity_thresh$diversity_group ==
"Medium Diversity"] ~ "Medium Diversity", Shannon > vc_diversity_thresh$Shannon[vc_diversity_thresh$diversity_group ==
"Medium Diversity"] ~ "High Diversity")) %>%
drop_na(diversity_group) %>%
mutate(diversity_group = factor(diversity_group, levels = c("Low Diversity",
"Medium Diversity", "High Diversity"))) %>%
ggplot(aes(x = reorder(sampleID, Shannon), y = Shannon)) +
geom_bar(stat = "identity", aes(fill = diversity_group),
width = 0.9) + theme_bw() + theme(legend.position = "none",
axis.text.x = eb(), axis.title.x = eb(), axis.ticks.x = eb(),
strip.text = eb(), strip.background = er(fill = "white"),
axis.title.y = et(color = "black", size = 14), axis.text.y = et(color = "black",
size = 12), panel.spacing = unit(0.5, "lines"), plot.margin = margin(t = 0,
r = 5, b = 0, l = 5), panel.grid = eb()) + scale_fill_manual(values = diversity_group_colors[c(1:3)]) +
facet_grid(. ~ diversity_group, scales = "free", space = "free")
gg_vc_metaphlan_shannon <- plot_grid(gg_vc_metaphlan_grob, gg_vc_shannon,
axis = "lb", align = "hv", nrow = 2, rel_heights = c(1, 0.15))
pdf(file = "./Results/VC_Metaphlan_Diversity.pdf", width = 12.25,
height = 8)
gg_vc_metaphlan_shannon
invisible(dev.off())# Re-train/tune original model on ALL original data:
# Train the model to tune hyperparameters
# Initial model to find optimal number of components to include
tot_diversity_train_splsda <- mixOmics::splsda(diversity_metab_mat, diversity_metab_labs, ncomp = 5)
# Performance assessment
## 5-fold, 50-repeat cross validation
set.seed(1234)
tot_diversity_train_plsda_perf <-
perf(
tot_diversity_train_splsda,
validation = "Mfold",
folds = 5,
progressBar = FALSE,
auc = TRUE,
nrepeat = 50
)
plot(
tot_diversity_train_plsda_perf,
col = color.mixo(5:7),
sd = FALSE,
auc = TRUE,
legend.position = "horizontal"
) # ncomp = 4 might be best for classification error rate and max.dist# Number of optimal variables to select for each component
tot_diversity_train_keepX <- c(1:10, seq(20, 130, 10))
set.seed(123)
tot_diversity_train_tune_splsda <-
mixOmics::tune.splsda(
diversity_metab_mat,
diversity_metab_labs,
ncomp = 4, # Choose 4 components (max) to be safe
validation = 'Mfold',
folds = 5,
dist = 'max.dist',
progressBar = FALSE,
auc = TRUE,
measure = "BER",
test.keepX = tot_diversity_train_keepX,
nrepeat = 50
)
plot(tot_diversity_train_tune_splsda, col = color.jet(4))tot_diversity_error <- tot_diversity_train_tune_splsda$error.rate
tot_diversity_ncomp <- tot_diversity_train_tune_splsda$choice.ncomp$ncomp # optimal number of components based on t-tests on the error rate
tot_diversity_ncomp #1 component is optimal## [1] 1
tot_diversity_select_keepX <- tot_diversity_train_tune_splsda$choice.keepX[1:ifelse(tot_diversity_ncomp == 1, tot_diversity_ncomp + 1, tot_diversity_ncomp)] # optimal number of variables to select per component
tot_diversity_select_keepX## comp1 comp2
## 70 1
# Final Model
tot_diversity_splsda_final <-
mixOmics::splsda(diversity_metab_mat, diversity_metab_labs, ncomp = ifelse(tot_diversity_ncomp == 1, tot_diversity_ncomp + 1, tot_diversity_ncomp), keepX = tot_diversity_select_keepX)
# Needed for downstream excel file for component loadings
tot_diversity_splsda_final_copy <- tot_diversity_splsda_final
# Build data matrix
vc_diversity_metab_mat <-
vc_metab_qual %>%
ungroup() %>%
select(patientID, sampleID, compound, mvalue) %>%
inner_join(vc_first_samps %>% mutate(sampleID = recode(sampleID, `vc-011-02` = "vc-011-01", `vc-012-02` = "vc-012-01"))) %>%
mutate(compound = str_to_title(compound)) %>%
filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>%
ungroup() %>%
left_join(
vc_metaphlan_df2 %>%
right_join(vc_first_samps %>% mutate(
sampleID = recode(sampleID, `vc-011-02` = "vc-011-01", `vc-012-02` = "vc-012-01")
)) %>%
mutate(
diversity_group = case_when(
Shannon <= vc_diversity_thresh$Shannon[vc_diversity_thresh$diversity_group == "Low Diversity"] ~
"Low Diversity",
Shannon > vc_diversity_thresh$Shannon[vc_diversity_thresh$diversity_group == "Low Diversity"] &
Shannon <= vc_diversity_thresh$Shannon[vc_diversity_thresh$diversity_group == "Medium Diversity"] ~
"Medium Diversity",
Shannon > vc_diversity_thresh$Shannon[vc_diversity_thresh$diversity_group == "Medium Diversity"] ~
"High Diversity"
),
diversity_group_abv = recode(diversity_group, `Low Diversity` = "Low", `Medium Diversity` = "Medium", `High Diversity` = "High")
) %>%
drop_na(diversity_group, diversity_group_abv) %>%
mutate(
# diversity_group_abv = factor(diversity_group_abv, levels = c("Low", "Medium", "High")),
diversity_group = factor(
diversity_group,
levels = c("Low Diversity", "Medium Diversity", "High Diversity")
)) %>%
ungroup() %>%
distinct(sampleID, diversity_group_abv)
) %>%
group_by(sampleID, compound, diversity_group_abv) %>%
# summarise(mvalue = mean(mvalue, na.rm = TRUE)) %>%
ungroup() %>%
mutate_all(~replace(., is.nan(.), NA)) %>%
select(sampleID, compound, mvalue, diversity_group_abv) %>%
drop_na(sampleID) %>%
pivot_wider(names_from = "compound", values_from = "mvalue") %>%
# mutate_all(~replace(., is.na(.), 0)) %>%
filter(sampleID != "") %>%
column_to_rownames(var = "sampleID") %>%
select(-diversity_group_abv) %>%
select(names(diversity_metab_mat)) %>%
filter_all(any_vars(!is.na(.)))
# Build labels matrix
vc_diversity_metab_labs <-
vc_diversity_metab_mat %>%
rownames_to_column(var = "sampleID") %>%
left_join(
vc_metaphlan_df2 %>%
right_join(vc_first_samps %>% mutate(
sampleID = recode(sampleID, `vc-011-02` = "vc-011-01", `vc-012-02` = "vc-012-01")
)) %>%
mutate(
diversity_group = case_when(
Shannon <= vc_diversity_thresh$Shannon[vc_diversity_thresh$diversity_group == "Low Diversity"] ~
"Low Diversity",
Shannon > vc_diversity_thresh$Shannon[vc_diversity_thresh$diversity_group == "Low Diversity"] &
Shannon <= vc_diversity_thresh$Shannon[vc_diversity_thresh$diversity_group == "Medium Diversity"] ~
"Medium Diversity",
Shannon > vc_diversity_thresh$Shannon[vc_diversity_thresh$diversity_group == "Medium Diversity"] ~
"High Diversity"
),
diversity_group_abv = recode(diversity_group, `Low Diversity` = "Low", `Medium Diversity` = "Medium", `High Diversity` = "High")
) %>%
drop_na(diversity_group, diversity_group_abv) %>%
mutate(
diversity_group_abv = factor(diversity_group_abv, levels = c("Low", "Medium", "High")),
# diversity_group = factor(
# diversity_group,
# levels = c("Low Diversity", "Medium Diversity", "High Diversity")
# )
) %>%
ungroup() %>%
distinct(sampleID, diversity_group_abv)
) %>%
droplevels() %>%
pull(diversity_group_abv)
dim(vc_diversity_metab_mat) #35 93 (means 93 compounds and 35 LT patients)## [1] 35 93
## [1] 35
# Predict using model
vc_diversity_tot <- predict(tot_diversity_splsda_final,
vc_diversity_metab_mat,
dist = "all")
vc_diversity_tot_predict <- factor(vc_diversity_tot$class$max.dist[,tot_diversity_ncomp], levels = c("Low", "Medium", "High"))
vc_diversity_tot_union <- factor(union(vc_diversity_tot_predict, vc_diversity_metab_labs), levels = c("Low", "Medium", "High"))
vc_diversity_cm <-
confusionMatrix(table(
# factor(
vc_diversity_tot_predict,
# vc_diversity_tot_union),
# factor(
vc_diversity_metab_labs#,
# vc_diversity_tot_union
# )
))
vc_diversity_cm## Confusion Matrix and Statistics
##
## vc_diversity_metab_labs
## vc_diversity_tot_predict Low Medium High
## Low 16 10 2
## Medium 0 2 1
## High 0 1 3
##
## Overall Statistics
##
## Accuracy : 0.6
## 95% CI : (0.4211, 0.7613)
## No Information Rate : 0.4571
## P-Value [Acc > NIR] : 0.063603
##
## Kappa : 0.3137
##
## Mcnemar's Test P-Value : 0.007383
##
## Statistics by Class:
##
## Class: Low Class: Medium Class: High
## Sensitivity 1.0000 0.15385 0.50000
## Specificity 0.3684 0.95455 0.96552
## Pos Pred Value 0.5714 0.66667 0.75000
## Neg Pred Value 1.0000 0.65625 0.90323
## Prevalence 0.4571 0.37143 0.17143
## Detection Rate 0.4571 0.05714 0.08571
## Detection Prevalence 0.8000 0.08571 0.11429
## Balanced Accuracy 0.6842 0.55420 0.73276
# Additional model measures
vc_diversity_epi <- mltest::ml_test(predicted = factor(vc_diversity_tot_predict, levels = c("Low", "Medium", "High")),
true = factor(vc_diversity_metab_labs, levels = c("Low", "Medium", "High")))
vc_diversity_cm_names <- vc_diversity_cm$table
colnames(vc_diversity_cm_names) <- c("Actual\nLow", "Actual\nMedium", "Actual\nHigh")
rownames(vc_diversity_cm_names) <- c("Predicted\nLow", "Predicted\nMedium", "Predicted\nHigh")
vc_diversity_confusion_df <- vc_diversity_cm_names %>%
t()
vc_diversity_confusion_df## vc_diversity_tot_predict
## vc_diversity_metab_labs Predicted\nLow Predicted\nMedium Predicted\nHigh
## Actual\nLow 16 0 0
## Actual\nMedium 10 2 1
## Actual\nHigh 2 1 3
# multiclass 95% CI
vc_diversity_mc <-
biostatUtil::multiClassCM(
factor(vc_diversity_metab_labs, levels = c("Low", "Medium", "High")),
factor(vc_diversity_tot_predict, levels = c("Low", "Medium", "High")),
seed = 20,
num.boot = 1000,
conf.level = 0.95,
digits = 2,
method = "wilson"
)
vc_diversity_mc_table <- vc_diversity_mc$table %>%
as.data.frame() %>%
separate(., Low, into = c("X1", "X2", "X3", "X4"), sep = " ") %>%
select(Average, "Low_Avg" = X1, "Low_Lower" = X2, "Low_Upper" = X4, Medium, High) %>%
separate(., Medium, into = c("X1", "X2", "X3", "X4"), sep = " ") %>%
select(Average, Low_Avg, Low_Lower, Low_Upper, "Med_Avg" = X1, "Med_Lower" = X2, "Med_Upper" = X4, High) %>%
separate(., High, into = c("X1", "X2", "X3", "X4"), sep = " ") %>%
select(Average, Low_Avg, Low_Lower, Low_Upper, Med_Avg, Med_Lower, Med_Upper, "High_Avg" = X1, "High_Lower" = X2, "High_Upper" = X4) %>%
mutate_all(funs(str_replace(., "\\(|\\)", ""))) %>%
mutate_if(is.character,as.numeric)# Train the model to tune hyperparameters
# Initial model to find optimal number of components to include
tot_ecoc_doms_train_splsda <- mixOmics::splsda(ecoc_doms_metab_mat, ecoc_doms_metab_labs, ncomp = 5)
# Performance assessment
## 5-fold, 50-repeat cross validation
set.seed(1234)
tot_ecoc_doms_train_plsda_perf <-
perf(
tot_ecoc_doms_train_splsda,
validation = "Mfold",
folds = 5,
progressBar = FALSE,
auc = TRUE,
nrepeat = 50
)
plot(
tot_ecoc_doms_train_plsda_perf,
col = color.mixo(5:7),
sd = FALSE,
auc = TRUE,
legend.position = "horizontal"
) # ncomp = 1 or 4 is best for classification error rate and max.dist# Number of optimal variables to select for each component
tot_ecoc_doms_train_keepX <- c(1:10, seq(20, 108, 10))
set.seed(123)
tot_ecoc_doms_train_tune_splsda <-
mixOmics::tune.splsda(
ecoc_doms_metab_mat,
ecoc_doms_metab_labs,
ncomp = 4, # Choose 4 components (max) to be safe
validation = 'Mfold',
folds = 5,
dist = 'max.dist',
progressBar = FALSE,
auc = TRUE,
measure = "BER",
test.keepX = tot_ecoc_doms_train_keepX,
nrepeat = 50
)
plot(tot_ecoc_doms_train_tune_splsda, col = color.jet(4))tot_ecoc_doms_train_error <- tot_ecoc_doms_train_tune_splsda$error.rate
tot_ecoc_doms_train_ncomp <- tot_ecoc_doms_train_tune_splsda$choice.ncomp$ncomp # optimal number of components based on t-tests on the error rate
# tot_ecoc)doms_train_ncomp = 4 #4 components are optimal via visual inspection
tot_ecoc_doms_train_select_keepX <- tot_ecoc_doms_train_tune_splsda$choice.keepX[1:ifelse(tot_ecoc_doms_train_ncomp == 1, tot_ecoc_doms_train_ncomp + 1, tot_ecoc_doms_train_ncomp)] # optimal number of variables to select per component
tot_ecoc_doms_train_select_keepX## comp1 comp2
## 90 3
# Final Model
tot_ecoc_doms_train_splsda_final <-
mixOmics::splsda(ecoc_doms_metab_mat, ecoc_doms_metab_labs, ncomp = ifelse(tot_ecoc_doms_train_ncomp == 1, tot_ecoc_doms_train_ncomp + 1, tot_ecoc_doms_train_ncomp), keepX = tot_ecoc_doms_train_select_keepX)
# Needed for downstream excel file for component loadings
tot_ecoc_doms_train_splsda_final_copy <- tot_ecoc_doms_train_splsda_final
vc_ecoc <- vc_metaphlan_df2 %>%
right_join(vc_first_samps %>% mutate(
sampleID = recode(sampleID, `vc-011-02` = "vc-011-01", `vc-012-02` = "vc-012-01")
)) %>%
group_by(sampleID) %>%
filter(grepl(x = Genus, pattern = "Enterococcus", ignore.case = TRUE)) %>%
count(sampleID, wt = pctseqs, name = "enterococcus_rel_abundance") %>%
full_join(vc_first_samps %>% ungroup() %>% mutate(sampleID = recode(sampleID, `vc-011-02` = "vc-011-01", `vc-012-02` = "vc-012-01")) %>% select(sampleID)) %>%
mutate(enterococcus_rel_abundance = ifelse(is.na(enterococcus_rel_abundance), 0, enterococcus_rel_abundance),
domination = case_when(enterococcus_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[2] ~ "Expansion",
TRUE ~ "No Expansion")) %>%
drop_na(domination) %>%
ungroup() %>%
distinct(sampleID, domination)
vc_ecoc_doms_metab_mat <-
vc_metab_qual %>%
ungroup() %>%
select(patientID, sampleID, compound, mvalue) %>%
inner_join(vc_first_samps %>% mutate(sampleID = recode(sampleID, `vc-011-02` = "vc-011-01", `vc-012-02` = "vc-012-01"))) %>%
mutate(compound = str_to_title(compound)) %>%
filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>%
ungroup() %>%
left_join(vc_ecoc) %>%
group_by(sampleID, compound, domination) %>%
# summarise(mvalue = mean(mvalue, na.rm = TRUE)) %>%
ungroup() %>%
mutate_all(~replace(., is.nan(.), NA)) %>%
select(sampleID, compound, mvalue, domination) %>%
drop_na(sampleID) %>%
pivot_wider(names_from = "compound", values_from = "mvalue") %>%
# mutate_all(~replace(., is.na(.), 0)) %>%
filter(sampleID != "") %>%
filter_all(any_vars(!is.na(.))) %>%
select(-domination) %>%
column_to_rownames(var = "sampleID") %>%
select(names(ecoc_doms_metab_mat) %>% as.data.frame() %>% dplyr::rename(compound = ".") %>% mutate(compound = recode(compound, `Omega-Muricholic Acid` = "3-Epicholic Acid Or Omega-Muricholic Acid")) %>% pull(compound)) %>%
filter_all(any_vars(!is.na(.)))
vc_ecoc_doms_metab_labs <-
vc_ecoc_doms_metab_mat %>%
rownames_to_column(var = "sampleID") %>%
left_join(vc_ecoc) %>%
pull(domination)
dim(vc_ecoc_doms_metab_mat) #35 93 (means 93 compounds and 35 LT patients)## [1] 35 93
## [1] 35
# Model metrics for all samples
vc_ecoc_doms_tot <- predict(tot_ecoc_doms_train_splsda_final,
vc_ecoc_doms_metab_mat,
dist = "all")
vc_ecoc_doms_tot_predict <- vc_ecoc_doms_tot$class$mahalanobis.dist[,tot_ecoc_doms_train_ncomp]
vc_ecoc_doms_tot_union <- union(vc_ecoc_doms_tot_predict, vc_ecoc_doms_metab_labs)
vc_ecoc_doms_cm <- confusionMatrix(table(factor(vc_ecoc_doms_tot_predict, vc_ecoc_doms_tot_union,
levels = c("Expansion","No Expansion")),
factor(vc_ecoc_doms_metab_labs, vc_ecoc_doms_tot_union,
levels = c("Expansion","No Expansion"))),
positive = "Expansion")
vc_ecoc_doms_cm## Confusion Matrix and Statistics
##
##
## Expansion No Expansion
## Expansion 10 5
## No Expansion 2 18
##
## Accuracy : 0.8
## 95% CI : (0.6306, 0.9156)
## No Information Rate : 0.6571
## P-Value [Acc > NIR] : 0.05024
##
## Kappa : 0.5812
##
## Mcnemar's Test P-Value : 0.44969
##
## Sensitivity : 0.8333
## Specificity : 0.7826
## Pos Pred Value : 0.6667
## Neg Pred Value : 0.9000
## Prevalence : 0.3429
## Detection Rate : 0.2857
## Detection Prevalence : 0.4286
## Balanced Accuracy : 0.8080
##
## 'Positive' Class : Expansion
##
# Additional model measures
vc_ecoc_doms_epi <- epiR::epi.tests(table(vc_ecoc_doms_tot_predict, vc_ecoc_doms_metab_labs), conf.level = 0.95)
vc_ecoc_doms_confusion_df <- vc_ecoc_doms_epi$tab %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "actual") %>%
mutate(actual = case_when(grepl(actual, pattern = "+", fixed = TRUE) ~ "Actual\nExpansion",
grepl(actual, pattern = "-", fixed = TRUE) ~ "Actual\nNo Expansion",
TRUE ~ "Total")) %>%
dplyr::rename("Predicted\nExpansion" = "Test +",
"Predicted\nNo Expansion" = "Test -") %>%
column_to_rownames(var = "actual")
vc_ecoc_doms_confusion_df## [1] 18
# Train the model to tune hyperparameters
# Initial model to find optimal number of components to include
tot_ebac_doms_train_splsda <- mixOmics::splsda(ebac_doms_metab_mat, ebac_doms_metab_labs, ncomp = 5)
# Performance assessment
## 5-fold, 50-repeat cross validation
set.seed(3456)
tot_ebac_doms_train_plsda_perf <-
perf(
tot_ebac_doms_train_splsda,
validation = "Mfold",
folds = 5,
progressBar = FALSE,
auc = TRUE,
nrepeat = 50
)
plot(
tot_ebac_doms_train_plsda_perf,
col = color.mixo(5:7),
sd = FALSE,
auc = TRUE,
legend.position = "horizontal"
) # ncomp = 2 seems best for classification error rate and max.dist# Number of optimal variables to select for each component
tot_ebac_doms_train_keepX <- c(1:10, seq(20, 108, 10))
set.seed(123)
tot_ebac_doms_train_tune_splsda <-
mixOmics::tune.splsda(
ebac_doms_metab_mat,
ebac_doms_metab_labs,
ncomp = 3, # Choose 3 components (max) to be safe
validation = 'Mfold',
folds = 5,
dist = 'max.dist',
progressBar = FALSE,
auc = TRUE,
measure = "BER",
test.keepX = tot_ebac_doms_train_keepX,
nrepeat = 50
)
plot(tot_ebac_doms_train_tune_splsda, col = color.jet(3))tot_ebac_doms_train_error <- tot_ebac_doms_train_tune_splsda$error.rate
tot_ebac_doms_train_ncomp <- tot_ebac_doms_train_tune_splsda$choice.ncomp$ncomp # optimal number of components based on t-tests on the error rate
# tot_ebac_doms_train_ncomp = 2 #4 components are optimal via visual inspection
tot_ebac_doms_train_select_keepX <- tot_ebac_doms_train_tune_splsda$choice.keepX[1:ifelse(tot_ebac_doms_train_ncomp == 1, tot_ebac_doms_train_ncomp + 1, tot_ebac_doms_train_ncomp)] # optimal number of variables to select per component
tot_ebac_doms_train_select_keepX## comp1 comp2
## 1 1
# Final Model
tot_ebac_doms_train_splsda_final <-
mixOmics::splsda(ebac_doms_metab_mat, ebac_doms_metab_labs, ncomp = ifelse(tot_ebac_doms_train_ncomp == 1, tot_ebac_doms_train_ncomp + 1, tot_ebac_doms_train_ncomp), keepX = tot_ebac_doms_train_select_keepX)
# Needed for downstream excel file for component loadings
tot_ebac_doms_train_splsda_final_copy <- tot_ebac_doms_train_splsda_final
vc_ebac <- vc_metaphlan_df2 %>%
right_join(vc_first_samps %>% mutate(
sampleID = recode(sampleID, `vc-011-02` = "vc-011-01", `vc-012-02` = "vc-012-01")
)) %>%
group_by(sampleID) %>%
filter(grepl(x = Genus, pattern = "Enterobacterales", ignore.case = TRUE)) %>%
count(sampleID, wt = pctseqs, name = "enterobacterales_rel_abundance") %>%
full_join(vc_first_samps %>% ungroup() %>% mutate(sampleID = recode(sampleID, `vc-011-02` = "vc-011-01", `vc-012-02` = "vc-012-01")) %>% select(sampleID)) %>%
mutate(enterobacterales_rel_abundance = ifelse(is.na(enterobacterales_rel_abundance), 0, enterobacterales_rel_abundance),
domination = case_when(enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1] ~ "Expansion",
TRUE ~ "No Expansion")) %>%
drop_na(domination) %>%
ungroup() %>%
distinct(sampleID, domination)
vc_ebac_doms_metab_mat <-
vc_metab_qual %>%
ungroup() %>%
select(patientID, sampleID, compound, mvalue#,
) %>%
inner_join(vc_first_samps %>% mutate(sampleID = recode(sampleID, `vc-011-02` = "vc-011-01", `vc-012-02` = "vc-012-01"))) %>%
mutate(compound = str_to_title(compound)) %>%
filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>%
ungroup() %>%
left_join(vc_ebac) %>%
group_by(sampleID, compound, domination) %>%
# summarise(mvalue = mean(mvalue, na.rm = TRUE)) %>%
ungroup() %>%
mutate_all(~replace(., is.nan(.), NA)) %>%
select(sampleID, compound, mvalue, domination) %>%
drop_na(sampleID) %>%
pivot_wider(names_from = "compound", values_from = "mvalue") %>%
mutate_all(~replace(., is.na(.), 0)) %>%
filter(sampleID != "") %>%
filter_all(any_vars(!is.na(.))) %>%
select(-domination) %>%
column_to_rownames(var = "sampleID") %>%
select(names(ebac_doms_metab_mat) %>% as.data.frame() %>% dplyr::rename(compound = ".") %>% mutate(compound = recode(compound, `Omega-Muricholic Acid` = "3-Epicholic Acid Or Omega-Muricholic Acid")) %>% pull(compound)) %>%
filter_all(any_vars(!is.na(.)))
vc_ebac_doms_metab_labs <-
vc_ebac_doms_metab_mat %>%
rownames_to_column(var = "sampleID") %>%
left_join(vc_ebac) %>%
pull(domination)
dim(vc_ebac_doms_metab_mat) #35 93 (means 93 compounds and 35 LT patients)## [1] 35 93
## [1] 35
# Model metrics for all samples
vc_ebac_doms_tot <- predict(tot_ebac_doms_train_splsda_final,
vc_ebac_doms_metab_mat,
dist = "all")
vc_ebac_doms_tot_predict <- vc_ebac_doms_tot$class$mahalanobis.dist[,tot_ebac_doms_train_ncomp]
vc_ebac_doms_tot_union <- union(vc_ebac_doms_tot_predict, vc_ebac_doms_metab_labs)
vc_ebac_doms_cm <- confusionMatrix(table(factor(vc_ebac_doms_tot_predict, vc_ebac_doms_tot_union,
levels = c("No Expansion","Expansion")),
factor(vc_ebac_doms_metab_labs, vc_ebac_doms_tot_union,
levels = c("No Expansion","Expansion"))),
positive = "Expansion")
vc_ebac_doms_cm## Confusion Matrix and Statistics
##
##
## No Expansion Expansion
## No Expansion 27 6
## Expansion 1 1
##
## Accuracy : 0.8
## 95% CI : (0.6306, 0.9156)
## No Information Rate : 0.8
## P-Value [Acc > NIR] : 0.5993
##
## Kappa : 0.1463
##
## Mcnemar's Test P-Value : 0.1306
##
## Sensitivity : 0.14286
## Specificity : 0.96429
## Pos Pred Value : 0.50000
## Neg Pred Value : 0.81818
## Prevalence : 0.20000
## Detection Rate : 0.02857
## Detection Prevalence : 0.05714
## Balanced Accuracy : 0.55357
##
## 'Positive' Class : Expansion
##
# Additional model measures
vc_ebac_doms_epi <- epiR::epi.tests(table(vc_ebac_doms_tot_predict, vc_ebac_doms_metab_labs), conf.level = 0.95)
vc_ebac_doms_confusion_df <- vc_ebac_doms_epi$tab %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "actual") %>%
mutate(actual = case_when(grepl(actual, pattern = "+", fixed = TRUE) ~ "Actual\nExpansion",
grepl(actual, pattern = "-", fixed = TRUE) ~ "Actual\nNo Expansion",
TRUE ~ "Total")) %>%
dplyr::rename("Predicted\nExpansion" = "Test +",
"Predicted\nNo Expansion" = "Test -") %>%
column_to_rownames(var = "actual")
vc_ebac_doms_confusion_df## [1] 4.5
# Train the model to tune hyperparameters
# Initial model to find optimal number of components to include
tot_infx_train_splsda <- mixOmics::splsda(infx_metab_mat, infx_metab_labs, ncomp = 5)
# Performance assessment
## 5-fold, 100-repeat cross validation
set.seed(1234)
tot_infx_train_plsda_perf <-
perf(
tot_infx_train_splsda,
validation = "Mfold",
folds = 5,
progressBar = FALSE,
auc = TRUE,
nrepeat = 50
)
plot(
tot_infx_train_plsda_perf,
col = color.mixo(5:7),
sd = FALSE,
auc = TRUE,
legend.position = "horizontal"
) # ncomp = 1 is best for classification error rate and max.dist# Number of optimal variables to select for each component
tot_infx_train_keepX <- c(1:10, seq(20, 108, 10))
set.seed(5678)
tot_infx_train_tune_splsda <-
mixOmics::tune.splsda(
infx_metab_mat,
infx_metab_labs,
ncomp = 3, # Choose 3 components (max) to be safe
validation = 'Mfold',
folds = 5,
dist = 'max.dist',
progressBar = FALSE,
auc = TRUE,
measure = "BER",
test.keepX = tot_infx_train_keepX,
nrepeat = 100
)
plot(tot_infx_train_tune_splsda, col = color.jet(3))tot_infx_train_error <- tot_infx_train_tune_splsda$error.rate
tot_infx_train_ncomp <- tot_infx_train_tune_splsda$choice.ncomp$ncomp # optimal number of components based on t-tests on the error rate
tot_infx_train_ncomp #3 component is optimal## [1] 3
tot_infx_train_select_keepX <- tot_infx_train_tune_splsda$choice.keepX[1:ifelse(tot_infx_train_ncomp == 1, tot_infx_train_ncomp + 1, tot_infx_train_ncomp)] # optimal number of variables to select per component
tot_infx_train_select_keepX## comp1 comp2 comp3
## 90 90 70
# Final Model
tot_infx_train_splsda_final <-
mixOmics::splsda(infx_metab_mat, infx_metab_labs, ncomp = ifelse(tot_infx_train_ncomp == 1, tot_infx_train_ncomp + 1, tot_infx_train_ncomp), keepX = tot_infx_train_select_keepX)
# Needed for downstream excel file for component loadings
tot_infx_train_splsda_final_copy <- tot_infx_train_splsda_final
vc_infx <- vc_peri_criteria_all %>%
filter(bact_infection_present == "Yes") %>%
group_by(patientID, eday, infx_stool) %>%
dplyr::slice(1) %>%
group_by(patientID, eday) %>%
filter(abs(infx_stool - 0) == min(abs(infx_stool - 0))) %>%
group_by(patientID) %>%
dplyr::slice(1) %>%
select(patientID, sampleID, bact_infection_present)
# ungroup() %>%
# select(patientID, sampleID, bact_infection_present)
vc_no_infx <- vc_peri_criteria_all %>%
filter(patientID %!in% vc_infx$patientID) %>%
ungroup() %>%
select(patientID, sampleID, bact_infection_present) %>%
distinct() %>%
right_join(
vc_first_samps %>% ungroup() %>% mutate(
sampleID = recode(sampleID, `vc-011-02` = "vc-011-01", `vc-012-02` = "vc-012-01")
) %>% select(patientID, sampleID) %>% filter(patientID %!in% vc_infx$patientID | sampleID %!in% vc_infx$sampleID)
) %>%
ungroup() %>%
drop_na() %>%
distinct(patientID, sampleID, bact_infection_present)
vc_tot_infx <- bind_rows(vc_infx, vc_no_infx)
vc_infx_metab_mat <-
vc_metab_qual %>%
ungroup() %>%
select(patientID, sampleID, compound, mvalue) %>%
left_join(vc_first_samps %>% mutate(sampleID = recode(sampleID, `vc-011-02` = "vc-011-01", `vc-012-02` = "vc-012-01"))) %>%
mutate(compound = str_to_title(compound)) %>%
filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>%
ungroup() %>%
right_join(vc_tot_infx) %>%
group_by(sampleID, compound, bact_infection_present) %>%
summarise(mvalue = mean(mvalue, na.rm = TRUE)) %>%
ungroup() %>%
mutate_all(~replace(., is.nan(.), NA)) %>%
select(sampleID, compound, mvalue, bact_infection_present) %>%
drop_na(sampleID) %>%
pivot_wider(names_from = "compound", values_from = "mvalue") %>%
filter(sampleID != "") %>%
mutate(bact_infection_present = ifelse(grepl(x = bact_infection_present, pattern = "No"), "No Infection", "Infection")) %>%
select(-bact_infection_present) %>%
column_to_rownames(var = "sampleID") %>%
select(names(infx_metab_mat) %>% as.data.frame() %>% dplyr::rename(compound = ".") %>% pull(compound)) %>%
filter_all(any_vars(!is.na(.)))
vc_infx_metab_labs <-
vc_infx_metab_mat %>%
rownames_to_column(var = "sampleID") %>%
left_join(vc_peri_criteria_all %>% select(sampleID, bact_infection_present) %>% group_by(sampleID) %>% dplyr::slice(1)) %>%
mutate(bact_infection_present = ifelse(grepl(x = bact_infection_present, pattern = "No"), "No Infection", "Infection")) %>%
pull(bact_infection_present)
dim(vc_infx_metab_mat) #35 93 (means 93 compounds and 35 LT patients/infections)## [1] 35 93
## [1] 35
# Model metrics for all samples
vc_infx_tot <- predict(tot_infx_train_splsda_final,
vc_infx_metab_mat,
dist = "all")
vc_infx_tot_predict <- vc_infx_tot$class$max.dist[, (ifelse(tot_infx_train_ncomp == 1, tot_infx_train_ncomp + 1, tot_infx_train_ncomp))]
vc_infx_tot_union <- union(vc_infx_tot_predict, vc_infx_metab_labs)
vc_infx_cm <- confusionMatrix(table(factor(vc_infx_tot_predict, vc_infx_tot_union,
levels = c("No Infection", "Infection")),
factor(vc_infx_metab_labs, vc_infx_tot_union,
levels = c("No Infection", "Infection"))),
positive = "Infection")
vc_infx_cm## Confusion Matrix and Statistics
##
##
## No Infection Infection
## No Infection 26 4
## Infection 2 3
##
## Accuracy : 0.8286
## 95% CI : (0.6635, 0.9344)
## No Information Rate : 0.8
## P-Value [Acc > NIR] : 0.4328
##
## Kappa : 0.4
##
## Mcnemar's Test P-Value : 0.6831
##
## Sensitivity : 0.42857
## Specificity : 0.92857
## Pos Pred Value : 0.60000
## Neg Pred Value : 0.86667
## Prevalence : 0.20000
## Detection Rate : 0.08571
## Detection Prevalence : 0.14286
## Balanced Accuracy : 0.67857
##
## 'Positive' Class : Infection
##
# as.data.frame(t(data.frame(cbind(t(vc_infx_cm$byClass),t(vc_infx_cm$overall)))))
# Additional model measures
vc_infx_epi <- epiR::epi.tests(table(vc_infx_tot_predict, vc_infx_metab_labs), conf.level = 0.95)
vc_infx_confusion_df <- vc_infx_epi$tab %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "actual") %>%
mutate(actual = case_when(grepl(actual, pattern = "+", fixed = TRUE) ~ "Actual\nInfection",
grepl(actual, pattern = "-", fixed = TRUE) ~ "Actual\nNo Infection",
TRUE ~ "Total")) %>%
dplyr::rename("Predicted\nInfection" = "Test +",
"Predicted\nNo Infection" = "Test -") %>%
column_to_rownames(var = "actual")
vc_infx_confusion_df## [1] 9.75
og_metrics <- data.frame(model = c("Diversity Overall: Internal Validation",
"Diversity Low: Internal Validation",
"Diversity Medium: Internal Validation",
"Diversity High: Internal Validation",
"Enterococcus Expansion: Internal Validation",
"Enterobacterales Expansion: Internal Validation",
"Infection: Internal Validation"),
acc = c(diversity_cm$overall[1],
diversity_mc_table$Low_Avg[8],
diversity_mc_table$Med_Avg[8],
diversity_mc_table$High_Avg[8],
ecoc_doms_cm$overall[1],
ebac_doms_cm$overall[1],
infx_cm$overall[1]
),
acc_lower = c(diversity_cm$overall[3],
diversity_mc_table$Low_Lower[8],
diversity_mc_table$Med_Lower[8],
diversity_mc_table$High_Lower[8],
ecoc_doms_cm$overall[3],
ebac_doms_cm$overall[3],
infx_cm$overall[3]
),
acc_upper = c(diversity_cm$overall[4],
diversity_mc_table$Low_Upper[8],
diversity_mc_table$Med_Upper[8],
diversity_mc_table$High_Upper[8],
ecoc_doms_cm$overall[4],
ebac_doms_cm$overall[4],
infx_cm$overall[4]),
sens = c(NA,
diversity_mc_table$Low_Avg[1],
diversity_mc_table$Med_Avg[1],
diversity_mc_table$High_Avg[1],
ecoc_doms_epi$detail[2][3,],
ebac_doms_epi$detail[2][3,],
infx_epi$detail[2][3,]),
sens_lower = c(NA,
diversity_mc_table$Low_Lower[1],
diversity_mc_table$Med_Lower[1],
diversity_mc_table$High_Lower[1],
ecoc_doms_epi$detail[3][3,],
ebac_doms_epi$detail[3][3,],
infx_epi$detail[3][3,]),
sens_upper = c(NA,
diversity_mc_table$Low_Upper[1],
diversity_mc_table$Med_Upper[1],
diversity_mc_table$High_Upper[1],
ecoc_doms_epi$detail[4][3,],
ebac_doms_epi$detail[4][3,],
infx_epi$detail[4][3,]),
spec = c(NA,
diversity_mc_table$Low_Avg[2],
diversity_mc_table$Med_Avg[2],
diversity_mc_table$High_Avg[2],
ecoc_doms_epi$detail[2][4,],
ebac_doms_epi$detail[2][4,],
infx_epi$detail[2][4,]),
spec_lower = c(NA,
diversity_mc_table$Low_Lower[2],
diversity_mc_table$Med_Lower[2],
diversity_mc_table$High_Lower[2],
ecoc_doms_epi$detail[3][4,],
ebac_doms_epi$detail[3][4,],
infx_epi$detail[3][4,]),
spec_upper = c(NA,
diversity_mc_table$Low_Upper[2],
diversity_mc_table$Med_Upper[2],
diversity_mc_table$High_Upper[2],
ecoc_doms_epi$detail[4][4,],
ebac_doms_epi$detail[4][4,],
infx_epi$detail[4][4,]),
odds = c(NA,
diversity_epi$DOR[1],
diversity_epi$DOR[2],
diversity_epi$DOR[3],
ecoc_doms_epi$detail[2][6,],
ebac_doms_epi$detail[2][6,],
infx_epi$detail[2][6,]),
odds_lower = c(NA,
NA,
NA,
NA,
ecoc_doms_epi$detail[3][6,],
ebac_doms_epi$detail[3][6,],
infx_epi$detail[3][6,]),
odds_upper = c(NA,
NA,
NA,
NA,
ecoc_doms_epi$detail[4][6,],
ebac_doms_epi$detail[4][6,],
infx_epi$detail[4][6,])
)# %>%
# pivot_longer(!model, names_to = "metric", values_to = "metric_value") %>%
# drop_na()
vc_metrics <- data.frame(model = c("Diversity Overall: External Validation",
"Diversity Low: External Validation",
"Diversity Medium: External Validation",
"Diversity High: External Validation",
"Enterococcus Expansion: External Validation",
"Enterobacterales Expansion: External Validation",
"Infection: External Validation"),
acc = c(vc_diversity_cm$overall[1],
vc_diversity_mc_table$Low_Avg[8],
vc_diversity_mc_table$Med_Avg[8],
vc_diversity_mc_table$High_Avg[8],
vc_ecoc_doms_cm$overall[1],
vc_ebac_doms_cm$overall[1],
vc_infx_cm$overall[1]
),
acc_lower = c(vc_diversity_cm$overall[3],
vc_diversity_mc_table$Low_Lower[8],
vc_diversity_mc_table$Med_Lower[8],
vc_diversity_mc_table$High_Lower[8],
vc_ecoc_doms_cm$overall[3],
vc_ebac_doms_cm$overall[3],
vc_infx_cm$overall[3]
),
acc_upper = c(vc_diversity_cm$overall[4],
vc_diversity_mc_table$Low_Upper[8],
vc_diversity_mc_table$Med_Upper[8],
vc_diversity_mc_table$High_Upper[8],
vc_ecoc_doms_cm$overall[4],
vc_ebac_doms_cm$overall[4],
vc_infx_cm$overall[4]),
sens = c(NA,
vc_diversity_mc_table$Low_Avg[1],
vc_diversity_mc_table$Med_Avg[1],
vc_diversity_mc_table$High_Avg[1],
vc_ecoc_doms_epi$detail[2][3,],
vc_ebac_doms_epi$detail[2][3,],
vc_infx_epi$detail[2][3,]),
sens_lower = c(NA,
vc_diversity_mc_table$Low_Lower[1],
vc_diversity_mc_table$Med_Lower[1],
vc_diversity_mc_table$High_Lower[1],
vc_ecoc_doms_epi$detail[3][3,],
vc_ebac_doms_epi$detail[3][3,],
vc_infx_epi$detail[3][3,]),
sens_upper = c(NA,
vc_diversity_mc_table$Low_Upper[1],
vc_diversity_mc_table$Med_Upper[1],
vc_diversity_mc_table$High_Upper[1],
vc_ecoc_doms_epi$detail[4][3,],
vc_ebac_doms_epi$detail[4][3,],
vc_infx_epi$detail[4][3,]),
spec = c(NA,
vc_diversity_mc_table$Low_Avg[2],
vc_diversity_mc_table$Med_Avg[2],
vc_diversity_mc_table$High_Avg[2],
vc_ecoc_doms_epi$detail[2][4,],
vc_ebac_doms_epi$detail[2][4,],
vc_infx_epi$detail[2][4,]),
spec_lower = c(NA,
vc_diversity_mc_table$Low_Lower[2],
vc_diversity_mc_table$Med_Lower[2],
vc_diversity_mc_table$High_Lower[2],
vc_ecoc_doms_epi$detail[3][4,],
vc_ebac_doms_epi$detail[3][4,],
vc_infx_epi$detail[3][4,]),
spec_upper = c(NA,
vc_diversity_mc_table$Low_Upper[2],
vc_diversity_mc_table$Med_Upper[2],
vc_diversity_mc_table$High_Upper[2],
vc_ecoc_doms_epi$detail[4][4,],
vc_ebac_doms_epi$detail[4][4,],
vc_infx_epi$detail[4][4,]),
odds = c(NA,
vc_diversity_epi$DOR[1],
vc_diversity_epi$DOR[2],
vc_diversity_epi$DOR[3],
vc_ecoc_doms_epi$detail[2][6,],
vc_ebac_doms_epi$detail[2][6,],
vc_infx_epi$detail[2][6,]),
odds_lower = c(NA,
NA,
NA,
NA,
vc_ecoc_doms_epi$detail[3][6,],
vc_ebac_doms_epi$detail[3][6,],
vc_infx_epi$detail[3][6,]),
odds_upper = c(NA,
NA,
NA,
NA,
vc_ecoc_doms_epi$detail[4][6,],
vc_ebac_doms_epi$detail[4][6,],
vc_infx_epi$detail[4][6,])
) #%>%
# pivot_longer(!c(model, acc_lower, acc_upper, sens_lower, sens_upper, spec_lower, spec_upper, odds_lower, odds_upper), names_to = "metric", values_to = "metric_value") %>%
# filter(is.nan(metric_value)|!is.na(metric_value))
metrics_total <- bind_rows(og_metrics, vc_metrics) %>%
pivot_longer(!c(model, acc_lower, acc_upper, sens_lower, sens_upper, spec_lower, spec_upper, odds_lower, odds_upper), names_to = "metric", values_to = "metric_point") %>%
drop_na(metric_point) %>%
pivot_longer(!c(!ends_with("lower")), names_to = "lower_ci", values_to = "lower_ci_value") %>%
pivot_longer(!c(!ends_with("upper")), names_to = "upper_ci", values_to = "upper_ci_value") %>%
filter(metric == "acc" & lower_ci == "acc_lower" & upper_ci == "acc_upper" |
metric == "sens" & lower_ci == "sens_lower" & upper_ci == "sens_upper" |
metric == "spec" & lower_ci == "spec_lower" & upper_ci == "spec_upper" #|
# metric == "odds" & lower_ci == "odds_lower" & upper_ci == "odds_upper"
) %>%
mutate(cohort = ifelse(grepl(model, pattern = "Internal"), "Internal Validation", "External Validation"),
cohort = factor(cohort, levels = c("External Validation", "Internal Validation")),
model_simple = gsub(model, pattern = ": External Validation|: Internal Validation", replacement = ""),
model_simple = gsub(model_simple, pattern = "\\s", replacement = "\n"),
model_simple = factor(model_simple, levels = c(
"Diversity\nLow",
"Diversity\nMedium",
"Diversity\nHigh",
"Diversity\nOverall",
"Enterococcus\nExpansion",
"Enterobacterales\nExpansion",
"Infection"
)),
metric = recode(metric, acc = "Accuracy",
sens = "Sensitivity",
spec = "Specificity"#,
# odds = "Odds Ratio"
),
metric = factor(metric, levels = c("Accuracy",
"Sensitivity",
"Specificity"#,
# "Odds Ratio"
)))
# select(-lower_ci, -upper_ci)
gg_metrics_total <-
metrics_total %>%
arrange(desc(cohort)) %>%
ggplot(., aes(x = metric_point,
y = cohort,
color = cohort)) +
geom_errorbar(aes(xmin = lower_ci_value, xmax = upper_ci_value), lwd = 1) +
geom_point(size = 3) +
theme_classic2() +
theme(panel.grid.minor = eb(),
axis.text.x = et(color = "black", size = 16, angle = 45, hjust = 1, vjust = 1),
axis.text.y = eb(),
axis.ticks.y = eb(),
axis.title.x = eb(),
axis.title.y = et(color = "black", size = 16),
legend.title = et(color = "black", size = 16),
legend.text = et(color = "black", size = 14),
strip.text.y.left = et(color = "black", size = 16, angle = 0),
strip.background.y = er(fill = "white"),
strip.text.x = et(color = "black", size = 16),
strip.background.x = er(fill = "white"),
legend.position = "right",
panel.spacing.x = unit(2.5, "mm"),
panel.spacing.y = unit(2, "mm"),
panel.border = er(colour = "black", fill = NA, linewidth = 1)) +
facet_grid(model_simple~metric, switch = "y") +
scale_x_continuous(labels = scales::percent) +
scale_color_manual(values = c("#8EDFE8", "#333F48FF")) +
guides(color = guide_legend("Model Cohort", reverse = TRUE, ncol = 1)) +
# xlab("\nValue") +
ylab("Model\n")
gg_metrics_total# Take absolute value to make diversity group loadings plot
# narrower
tot_diversity_splsda_final$loadings$X <- abs(tot_diversity_splsda_final$loadings$X)
{
pdf(file = "./Results/Retrained_Diversity_sPLDSA_Loadings.pdf",
height = 8, width = 11)
par(mfrow = c(1, 3))
# Component 1
plotLoadings(tot_diversity_splsda_final, contrib = "max",
method = "mean", comp = 1, legend = FALSE, legend.col = c("#EDE342",
"#F69A97", "#FF51EB"), size.name = 1.1, size.title = rel(1),
ndisplay = 50)
# Component 2
plotLoadings(tot_diversity_splsda_final, contrib = "max",
method = "mean", comp = 2, legend.col = c("#EDE342",
"#F69A97", "#FF51EB"), size.name = 1.1, size.title = rel(1),
ndisplay = 50)
invisible(dev.off())
}
par(mfrow = c(1, 3))
# Component 1
plotLoadings(tot_diversity_splsda_final, contrib = "max", method = "mean",
comp = 1, legend = FALSE, legend.col = c("#EDE342", "#F69A97",
"#FF51EB"), size.name = 0.6, size.title = rel(1), ndisplay = 50)
# Component 2
plotLoadings(tot_diversity_splsda_final, contrib = "max", method = "mean",
comp = 2, legend.col = c("#EDE342", "#F69A97", "#FF51EB"),
size.name = 0.6, size.title = rel(1), ndisplay = 50)# Take absolute value to make diversity group loadings plot
# narrower
tot_ecoc_doms_train_splsda_final$loadings$X <- abs(tot_ecoc_doms_train_splsda_final$loadings$X)
{
pdf(file = "./Results/Retrained_Enterococcus_Expansion_sPLDSA_Loadings.pdf",
height = 8, width = 11)
par(mfrow = c(1, 3))
# Component 1
plotLoadings(tot_ecoc_doms_train_splsda_final, contrib = "max",
method = "mean", comp = 1, legend = FALSE, legend.col = c("#0C7A3A",
"black"), size.name = 1.1, size.title = rel(1), ndisplay = 50)
# Component 2
plotLoadings(tot_ecoc_doms_train_splsda_final, contrib = "max",
method = "mean", comp = 2, legend.col = c("#0C7A3A",
"black"), size.name = 1.1, size.title = rel(1), ndisplay = 50)
invisible(dev.off())
}
# Component 1
plotLoadings(tot_ecoc_doms_train_splsda_final, contrib = "max",
method = "mean", comp = 1, legend = FALSE, legend.col = c("#0C7A3A",
"black"), size.name = 1.1, size.title = rel(1), ndisplay = 50)# Component 2
plotLoadings(tot_ecoc_doms_train_splsda_final, contrib = "max",
method = "mean", comp = 2, legend.col = c("#0C7A3A", "black"),
size.name = 1.1, size.title = rel(1), ndisplay = 50)# Take absolute value to make diversity group loadings plot
# narrower
tot_ebac_doms_train_splsda_final$loadings$X <- abs(tot_ebac_doms_train_splsda_final$loadings$X)
{
pdf(file = "./Results/Retrained_Enterobacterales_Expansion_sPLDSA_Loadings.pdf",
height = 8, width = 11)
par(mfrow = c(1, 3))
# Component 1
plotLoadings(tot_ebac_doms_train_splsda_final, contrib = "max",
method = "mean", comp = 1, legend = FALSE, legend.col = c("#FF0000",
"black"), size.name = 1.1, size.title = rel(1), ndisplay = 50)
# Component 2
plotLoadings(tot_ebac_doms_train_splsda_final, contrib = "max",
method = "mean", comp = 2, legend.col = c("#FF0000",
"black"), size.name = 1.1, size.title = rel(1), ndisplay = 50)
invisible(dev.off())
}
par(mfrow = c(1, 3))
# Component 1
plotLoadings(tot_ebac_doms_train_splsda_final, contrib = "max",
method = "mean", comp = 1, legend = FALSE, legend.col = c("#FF0000",
"black"), size.name = 0.6, size.title = rel(1), ndisplay = 50)
# Component 2
plotLoadings(tot_ebac_doms_train_splsda_final, contrib = "max",
method = "mean", comp = 2, legend.col = c("#FF0000", "black"),
size.name = 0.6, size.title = rel(1), ndisplay = 50)# Take absolute value to make diversity group loadings plot
# narrower
tot_infx_train_splsda_final$loadings$X <- abs(tot_infx_train_splsda_final$loadings$X)
{
pdf(file = "./Results/Retrained_Infection_sPLDSA_Loadings.pdf",
height = 8, width = 11)
par(mfrow = c(1, 3))
# Component 1
plotLoadings(tot_infx_train_splsda_final, contrib = "max",
method = "mean", comp = 1, legend = FALSE, legend.col = c("goldenrod",
"gray75"), size.name = 1.1, size.title = rel(1),
ndisplay = 50)
# Component 2
plotLoadings(tot_infx_train_splsda_final, contrib = "max",
method = "mean", comp = 2, legend.col = c("goldenrod",
"gray75"), size.name = 1.1, size.title = rel(1),
ndisplay = 50)
invisible(dev.off())
}
par(mfrow = c(1, 3))
# Component 1
plotLoadings(tot_infx_train_splsda_final, contrib = "max", method = "mean",
comp = 1, legend = FALSE, legend.col = c("goldenrod", "gray75"),
size.name = 0.6, size.title = rel(1), ndisplay = 50)
# Component 2
plotLoadings(tot_infx_train_splsda_final, contrib = "max", method = "mean",
comp = 2, legend.col = c("goldenrod", "gray75"), size.name = 0.6,
size.title = rel(1), ndisplay = 50)tot_diversity_loadings_comp1 <- plotLoadings(tot_diversity_splsda_final_copy,
contrib = "max", method = "mean", legend = FALSE, comp = 1,
legend.col = c("#EDE342", "#F69A97", "#FF51EB"), size.name = 1.1,
size.title = rel(1), ndisplay = 50)tot_diversity_loadings_comp2 <- plotLoadings(tot_diversity_splsda_final_copy,
contrib = "max", method = "mean", legend = FALSE, comp = 2,
legend.col = c("#EDE342", "#F69A97", "#FF51EB"), size.name = 1.1,
size.title = rel(1), ndisplay = 50)tot_ecoc_loadings_comp1 <- plotLoadings(tot_ecoc_doms_train_splsda_final,
contrib = "max", method = "mean", comp = 1, legend.col = c("#0C7A3A",
"black"), size.name = 1.1, size.title = rel(1), ndisplay = 50)tot_ecoc_loadings_comp2 <- plotLoadings(tot_ecoc_doms_train_splsda_final,
contrib = "max", method = "mean", comp = 2, legend.col = c("#0C7A3A",
"black"), size.name = 1.1, size.title = rel(1), ndisplay = 50)tot_ebac_loadings_comp1 <- plotLoadings(tot_ebac_doms_train_splsda_final,
contrib = "max", method = "mean", comp = 1, legend = FALSE,
legend.col = c("#FF0000", "black"), size.name = 1.1, size.title = rel(1),
ndisplay = 50)tot_ebac_loadings_comp2 <- plotLoadings(tot_ebac_doms_train_splsda_final,
contrib = "max", method = "mean", comp = 2, legend.col = c("#FF0000",
"black"), size.name = 1.1, size.title = rel(1), ndisplay = 50)tot_infx_loadings_comp1 <- plotLoadings(tot_infx_train_splsda_final,
contrib = "max", method = "mean", comp = 1, legend = FALSE,
legend.col = c("goldenrod", "gray75"), size.name = 1.1, size.title = rel(1),
ndisplay = 50)tot_infx_loadings_comp2 <- plotLoadings(tot_infx_train_splsda_final,
contrib = "max", method = "mean", comp = 2, legend.col = c("goldenrod",
"gray75"), size.name = 1.1, size.title = rel(1), ndisplay = 50){
# Diversity Component 1
wb1 <- tot_diversity_loadings_comp1 %>%
as.data.frame() %>%
rename_with(~stringr::str_replace(.x, pattern = "X\\.",
replacement = ""), matches("X.")) %>%
rename_with(~stringr::str_to_lower(.x)) %>%
rownames_to_column(var = "compound") %>%
left_join(heatmap_order %>%
select(compound, class, subclass)) %>%
dplyr::relocate(class, subclass, .after = "compound")
# Diversity Component 2
wb2 <- tot_diversity_loadings_comp2 %>%
as.data.frame() %>%
rename_with(~stringr::str_replace(.x, pattern = "X\\.",
replacement = ""), matches("X.")) %>%
rename_with(~stringr::str_to_lower(.x)) %>%
rownames_to_column(var = "compound") %>%
left_join(heatmap_order %>%
select(compound, class, subclass)) %>%
dplyr::relocate(class, subclass, .after = "compound")
# Enterococcus Expansion Component 1
wb3 <- tot_ecoc_loadings_comp1 %>%
as.data.frame() %>%
rename_with(~stringr::str_replace(.x, pattern = "X\\.",
replacement = ""), matches("X.")) %>%
rename_with(~stringr::str_to_lower(.x)) %>%
rownames_to_column(var = "compound") %>%
left_join(heatmap_order %>%
select(compound, class, subclass)) %>%
dplyr::relocate(class, subclass, .after = "compound")
# Enterococcus Expansion Component 2
wb4 <- tot_ecoc_loadings_comp2 %>%
as.data.frame() %>%
rename_with(~stringr::str_replace(.x, pattern = "X\\.",
replacement = ""), matches("X.")) %>%
rename_with(~stringr::str_to_lower(.x)) %>%
rownames_to_column(var = "compound") %>%
left_join(heatmap_order %>%
select(compound, class, subclass)) %>%
dplyr::relocate(class, subclass, .after = "compound")
# Enterobacterales Expansion Component 1
wb5 <- tot_ebac_loadings_comp1 %>%
as.data.frame() %>%
rename_with(~stringr::str_replace(.x, pattern = "X\\.",
replacement = ""), matches("X.")) %>%
rename_with(~stringr::str_to_lower(.x)) %>%
rownames_to_column(var = "compound") %>%
left_join(heatmap_order %>%
select(compound, class, subclass)) %>%
dplyr::relocate(class, subclass, .after = "compound")
# Enterobacterales Expansion Component 2
wb6 <- tot_ebac_loadings_comp2 %>%
as.data.frame() %>%
rename_with(~stringr::str_replace(.x, pattern = "X\\.",
replacement = ""), matches("X.")) %>%
rename_with(~stringr::str_to_lower(.x)) %>%
rownames_to_column(var = "compound") %>%
left_join(heatmap_order %>%
select(compound, class, subclass)) %>%
dplyr::relocate(class, subclass, .after = "compound")
# Any Bacterial Infection Component 1
wb7 <- tot_infx_loadings_comp1 %>%
as.data.frame() %>%
rename_with(~stringr::str_replace(.x, pattern = "X\\.",
replacement = ""), matches("X.")) %>%
rename_with(~stringr::str_to_lower(.x)) %>%
rownames_to_column(var = "compound") %>%
left_join(heatmap_order %>%
select(compound, class, subclass)) %>%
dplyr::relocate(class, subclass, .after = "compound")
# Any Bacterial Infection Component 2
wb8 <- tot_infx_loadings_comp2 %>%
as.data.frame() %>%
rename_with(~stringr::str_replace(.x, pattern = "X\\.",
replacement = ""), matches("X.")) %>%
rename_with(~stringr::str_to_lower(.x)) %>%
rownames_to_column(var = "compound") %>%
left_join(heatmap_order %>%
select(compound, class, subclass)) %>%
dplyr::relocate(class, subclass, .after = "compound")
# Define sheet names for each data frame
wb_names <- list(Diversity_1 = wb1, Diversity_2 = wb2, Enterococcus_Expansion_1 = wb3,
Enterococcus_Expansion_2 = wb4, Enterobacterales_Expansion_1 = wb5,
Enterobacterales_Expansion_2 = wb6, Infection_1 = wb7,
Infection_2 = wb8)
# Export each data frame to separate sheets in same
# Excel file
openxlsx::write.xlsx(wb_names, file = "./Results/Retrained_Model_sPLSDA_Loadings.xlsx",
tabColour = c("#EDE342", "#EDE342", "#0C7A3A", "#0C7A3A",
"#FF0000", "#FF0000", "goldenrod", "goldenrod"),
zoom = 90)
}## Vector of variables to summarize
vc_demo_vars <- c("race", "sex", "age", "meld_transplant", "Alcoholic Hepatitis",
"Alcoholic Cirrhosis", "NAFLD/NASH", "Primary Sclerosing Cholangitis",
"Acute Viral Hepatitis", "Chronic Hepatitis B", "Chronic Hepatitis C",
"Autoimmune", "Wilson's Disease", "Alpha-1 Antitrypsin",
"Hemachromatosis", "Drug Induced Liver Injury or Toxin",
"Budd Chiari", "Cryptogenic", "Malignancy", "Other", "Dialysis",
"Pressers", "Mechanical Ventilation")
## Vector of categorical variables that need transformation
vc_demo_cats <- c("race", "sex", "Alcoholic Hepatitis", "Alcoholic Cirrhosis",
"NAFLD/NASH", "Primary Sclerosing Cholangitis", "Acute Viral Hepatitis",
"Chronic Hepatitis B", "Chronic Hepatitis C", "Autoimmune",
"Wilson's Disease", "Alpha-1 Antitrypsin", "Hemachromatosis",
"Drug Induced Liver Injury or Toxin", "Budd Chiari", "Cryptogenic",
"Malignancy", "Other", "Dialysis", "Pressers", "Mechanical Ventilation")
vc_tab1_1 <- CreateTableOne(vars = vc_demo_vars, testNonNormal = "kruskal.test",
includeNA = TRUE, factorVars = vc_demo_cats, strata = "any_infection",
data = vc_demo)
summary(vc_tab1_1) # Age is potentially skewed, need to state that it is skewed and re-run `CreateTableOne`##
## ### Summary of continuous variables ###
##
## any_infection: 0
## n miss p.miss mean sd median p25 p75 min max skew kurt
## age 28 0 0 51 12 54 43 61 23 71 -0.5 -0.3
## meld_transplant 28 0 0 28 9 28 22 32 14 46 0.3 -0.6
## ------------------------------------------------------------
## any_infection: 1
## n miss p.miss mean sd median p25 p75 min max skew kurt
## age 7 0 0 52 19 59 48 64 15 68 -1.6 1.8
## meld_transplant 7 0 0 26 12 28 20 32 6 43 -0.5 -0.1
##
## p-values
## pNormal pNonNormal
## age 0.8542412 0.3860101
## meld_transplant 0.6861052 0.8042618
##
## Standardize mean differences
## 1 vs 2
## age 0.06662741
## meld_transplant 0.15442682
##
## =======================================================================================
##
## ### Summary of categorical variables ###
##
## any_infection: 0
## var n miss p.miss
## race 28 0 0.0
##
##
##
##
##
##
## sex 28 0 0.0
##
##
## Alcoholic Hepatitis 28 0 0.0
##
##
## Alcoholic Cirrhosis 28 0 0.0
##
##
## NAFLD/NASH 28 0 0.0
##
##
## Primary Sclerosing Cholangitis 28 0 0.0
##
## Acute Viral Hepatitis 28 0 0.0
##
## Chronic Hepatitis B 28 0 0.0
##
## Chronic Hepatitis C 28 0 0.0
##
##
## Autoimmune 28 0 0.0
##
## Wilson's Disease 28 0 0.0
##
## Alpha-1 Antitrypsin 28 0 0.0
##
## Hemachromatosis 28 0 0.0
##
## Drug Induced Liver Injury or Toxin 28 0 0.0
##
## Budd Chiari 28 0 0.0
##
## Cryptogenic 28 0 0.0
##
## Malignancy 28 0 0.0
##
##
## Other 28 0 0.0
##
##
## Dialysis 28 0 0.0
##
##
## Pressers 28 0 0.0
##
##
## Mechanical Ventilation 28 0 0.0
##
##
## level freq percent cum.percent
## American Indian or Alaska Native 0 0.0 0.0
## Black/African-American 3 10.7 10.7
## More than one Race 1 3.6 14.3
## Other Pacific Islander 1 3.6 17.9
## Unknown 3 10.7 28.6
## White 20 71.4 100.0
##
## Female 12 42.9 42.9
## Male 16 57.1 100.0
##
## 0 27 96.4 96.4
## 1 1 3.6 100.0
##
## 0 17 60.7 60.7
## 1 11 39.3 100.0
##
## 0 27 96.4 96.4
## 1 1 3.6 100.0
##
## 0 28 100.0 100.0
##
## 0 28 100.0 100.0
##
## 0 28 100.0 100.0
##
## 0 27 96.4 96.4
## 1 1 3.6 100.0
##
## 0 28 100.0 100.0
##
## 0 28 100.0 100.0
##
## 0 28 100.0 100.0
##
## 0 28 100.0 100.0
##
## 0 28 100.0 100.0
##
## 0 28 100.0 100.0
##
## 0 28 100.0 100.0
##
## 0 27 96.4 96.4
## 1 1 3.6 100.0
##
## 0 25 89.3 89.3
## 1 3 10.7 100.0
##
## 0 16 57.1 57.1
## 1 12 42.9 100.0
##
## 0 21 75.0 75.0
## 1 7 25.0 100.0
##
## 0 25 89.3 89.3
## 1 3 10.7 100.0
##
## ------------------------------------------------------------
## any_infection: 1
## var n miss p.miss
## race 7 0 0.0
##
##
##
##
##
##
## sex 7 0 0.0
##
##
## Alcoholic Hepatitis 7 0 0.0
##
##
## Alcoholic Cirrhosis 7 0 0.0
##
##
## NAFLD/NASH 7 0 0.0
##
##
## Primary Sclerosing Cholangitis 7 0 0.0
##
## Acute Viral Hepatitis 7 0 0.0
##
## Chronic Hepatitis B 7 0 0.0
##
## Chronic Hepatitis C 7 0 0.0
##
##
## Autoimmune 7 0 0.0
##
## Wilson's Disease 7 0 0.0
##
## Alpha-1 Antitrypsin 7 0 0.0
##
## Hemachromatosis 7 0 0.0
##
## Drug Induced Liver Injury or Toxin 7 0 0.0
##
## Budd Chiari 7 0 0.0
##
## Cryptogenic 7 0 0.0
##
## Malignancy 7 0 0.0
##
##
## Other 7 0 0.0
##
##
## Dialysis 7 0 0.0
##
##
## Pressers 7 0 0.0
##
##
## Mechanical Ventilation 7 0 0.0
##
##
## level freq percent cum.percent
## American Indian or Alaska Native 1 14.3 14.3
## Black/African-American 0 0.0 14.3
## More than one Race 0 0.0 14.3
## Other Pacific Islander 0 0.0 14.3
## Unknown 1 14.3 28.6
## White 5 71.4 100.0
##
## Female 4 57.1 57.1
## Male 3 42.9 100.0
##
## 0 7 100.0 100.0
## 1 0 0.0 100.0
##
## 0 4 57.1 57.1
## 1 3 42.9 100.0
##
## 0 7 100.0 100.0
## 1 0 0.0 100.0
##
## 0 7 100.0 100.0
##
## 0 7 100.0 100.0
##
## 0 7 100.0 100.0
##
## 0 6 85.7 85.7
## 1 1 14.3 100.0
##
## 0 7 100.0 100.0
##
## 0 7 100.0 100.0
##
## 0 7 100.0 100.0
##
## 0 7 100.0 100.0
##
## 0 7 100.0 100.0
##
## 0 7 100.0 100.0
##
## 0 7 100.0 100.0
##
## 0 7 100.0 100.0
## 1 0 0.0 100.0
##
## 0 5 71.4 71.4
## 1 2 28.6 100.0
##
## 0 4 57.1 57.1
## 1 3 42.9 100.0
##
## 0 5 71.4 71.4
## 1 2 28.6 100.0
##
## 0 7 100.0 100.0
## 1 0 0.0 100.0
##
##
## p-values
## pApprox pExact
## race 0.3789440 0.5680822
## sex 0.7991226 0.6772171
## Alcoholic Hepatitis 1.0000000 1.0000000
## Alcoholic Cirrhosis 1.0000000 1.0000000
## NAFLD/NASH 1.0000000 1.0000000
## Primary Sclerosing Cholangitis NA NA
## Acute Viral Hepatitis NA NA
## Chronic Hepatitis B NA NA
## Chronic Hepatitis C 0.8555397 0.3647059
## Autoimmune NA NA
## Wilson's Disease NA NA
## Alpha-1 Antitrypsin NA NA
## Hemachromatosis NA NA
## Drug Induced Liver Injury or Toxin NA NA
## Budd Chiari NA NA
## Cryptogenic NA NA
## Malignancy 1.0000000 1.0000000
## Other 0.5459717 0.2557573
## Dialysis 1.0000000 1.0000000
## Pressers 1.0000000 1.0000000
## Mechanical Ventilation 0.8800137 1.0000000
##
## Standardize mean differences
## 1 vs 2
## race 0.88345221
## sex 0.28867513
## Alcoholic Hepatitis 0.27216553
## Alcoholic Cirrhosis 0.07264327
## NAFLD/NASH 0.27216553
## Primary Sclerosing Cholangitis 0.00000000
## Acute Viral Hepatitis 0.00000000
## Chronic Hepatitis B 0.00000000
## Chronic Hepatitis C 0.38254603
## Autoimmune 0.00000000
## Wilson's Disease 0.00000000
## Alpha-1 Antitrypsin 0.00000000
## Hemachromatosis 0.00000000
## Drug Induced Liver Injury or Toxin 0.00000000
## Budd Chiari 0.00000000
## Cryptogenic 0.00000000
## Malignancy 0.27216553
## Other 0.46126560
## Dialysis 0.00000000
## Pressers 0.08071343
## Mechanical Ventilation 0.48989795
vc_tableone_skewed <- c("age", "meld_transplant")
vc_tab1_2 <- print(vc_tab1_1, nonnormal = vc_tableone_skewed,
formatOptions = list(big.mark = ","))## Stratified by any_infection
## 0
## n 28
## race (%)
## American Indian or Alaska Native 0 ( 0.0)
## Black/African-American 3 ( 10.7)
## More than one Race 1 ( 3.6)
## Other Pacific Islander 1 ( 3.6)
## Unknown 3 ( 10.7)
## White 20 ( 71.4)
## sex = Male (%) 16 ( 57.1)
## age (median [IQR]) 54.00 [43.00, 61.25]
## meld_transplant (median [IQR]) 28.50 [21.75, 32.25]
## Alcoholic Hepatitis = 1 (%) 1 ( 3.6)
## Alcoholic Cirrhosis = 1 (%) 11 ( 39.3)
## NAFLD/NASH = 1 (%) 1 ( 3.6)
## Primary Sclerosing Cholangitis = 0 (%) 28 (100.0)
## Acute Viral Hepatitis = 0 (%) 28 (100.0)
## Chronic Hepatitis B = 0 (%) 28 (100.0)
## Chronic Hepatitis C = 1 (%) 1 ( 3.6)
## Autoimmune = 0 (%) 28 (100.0)
## Wilson's Disease = 0 (%) 28 (100.0)
## Alpha-1 Antitrypsin = 0 (%) 28 (100.0)
## Hemachromatosis = 0 (%) 28 (100.0)
## Drug Induced Liver Injury or Toxin = 0 (%) 28 (100.0)
## Budd Chiari = 0 (%) 28 (100.0)
## Cryptogenic = 0 (%) 28 (100.0)
## Malignancy = 1 (%) 1 ( 3.6)
## Other = 1 (%) 3 ( 10.7)
## Dialysis = 1 (%) 12 ( 42.9)
## Pressers = 1 (%) 7 ( 25.0)
## Mechanical Ventilation = 1 (%) 3 ( 10.7)
## Stratified by any_infection
## 1 p
## n 7
## race (%) 0.379
## American Indian or Alaska Native 1 ( 14.3)
## Black/African-American 0 ( 0.0)
## More than one Race 0 ( 0.0)
## Other Pacific Islander 0 ( 0.0)
## Unknown 1 ( 14.3)
## White 5 ( 71.4)
## sex = Male (%) 3 ( 42.9) 0.799
## age (median [IQR]) 59.00 [48.00, 64.50] 0.386
## meld_transplant (median [IQR]) 28.00 [20.50, 32.00] 0.804
## Alcoholic Hepatitis = 1 (%) 0 ( 0.0) 1.000
## Alcoholic Cirrhosis = 1 (%) 3 ( 42.9) 1.000
## NAFLD/NASH = 1 (%) 0 ( 0.0) 1.000
## Primary Sclerosing Cholangitis = 0 (%) 7 (100.0) NA
## Acute Viral Hepatitis = 0 (%) 7 (100.0) NA
## Chronic Hepatitis B = 0 (%) 7 (100.0) NA
## Chronic Hepatitis C = 1 (%) 1 ( 14.3) 0.856
## Autoimmune = 0 (%) 7 (100.0) NA
## Wilson's Disease = 0 (%) 7 (100.0) NA
## Alpha-1 Antitrypsin = 0 (%) 7 (100.0) NA
## Hemachromatosis = 0 (%) 7 (100.0) NA
## Drug Induced Liver Injury or Toxin = 0 (%) 7 (100.0) NA
## Budd Chiari = 0 (%) 7 (100.0) NA
## Cryptogenic = 0 (%) 7 (100.0) NA
## Malignancy = 1 (%) 0 ( 0.0) 1.000
## Other = 1 (%) 2 ( 28.6) 0.546
## Dialysis = 1 (%) 3 ( 42.9) 1.000
## Pressers = 1 (%) 2 ( 28.6) 1.000
## Mechanical Ventilation = 1 (%) 0 ( 0.0) 0.880
## Stratified by any_infection
## test
## n
## race (%)
## American Indian or Alaska Native
## Black/African-American
## More than one Race
## Other Pacific Islander
## Unknown
## White
## sex = Male (%)
## age (median [IQR]) nonnorm
## meld_transplant (median [IQR]) nonnorm
## Alcoholic Hepatitis = 1 (%)
## Alcoholic Cirrhosis = 1 (%)
## NAFLD/NASH = 1 (%)
## Primary Sclerosing Cholangitis = 0 (%)
## Acute Viral Hepatitis = 0 (%)
## Chronic Hepatitis B = 0 (%)
## Chronic Hepatitis C = 1 (%)
## Autoimmune = 0 (%)
## Wilson's Disease = 0 (%)
## Alpha-1 Antitrypsin = 0 (%)
## Hemachromatosis = 0 (%)
## Drug Induced Liver Injury or Toxin = 0 (%)
## Budd Chiari = 0 (%)
## Cryptogenic = 0 (%)
## Malignancy = 1 (%)
## Other = 1 (%)
## Dialysis = 1 (%)
## Pressers = 1 (%)
## Mechanical Ventilation = 1 (%)
write.csv(vc_tab1_2, "./Results/Validation_Cohort_Demo_Table_1.csv",
row.names = TRUE) # Saving then reading in the same data allows for an easy way to adjust p-values, since it loads the object as a dataframe
# Need to adjust pvalues and arrange properly....hence the
# multiple dataframes below
vc_tab1_2_padjust1 <- read.csv("./Results/Validation_Cohort_Demo_Table_1.csv") %>%
dplyr::rename(` ` = X, `No Infection` = X0, `Bacterial Infection` = X1)
vc_tab1_2_padjust2 <- vc_tab1_2_padjust1 %>%
mutate(` ` = factor(` `, levels = vc_tab1_2_padjust1$` `))
vc_tab1_2_padjust3 <- vc_tab1_2_padjust1 %>%
mutate(test = ifelse(!is.na(p) & test == "", "chi.sq", test)) %>%
group_by(test) %>%
rstatix::adjust_pvalue(p.col = "p", method = "BH") %>%
ungroup() %>%
mutate(` ` = factor(` `, vc_tab1_2_padjust2$` `)) %>%
arrange(` `) %>%
mutate(p = ifelse(is.na(p), "", p), p.adj = ifelse(is.na(p.adj),
"", p.adj))
# Read in csv to then append adjusted pvalues
write.csv(vc_tab1_2_padjust3, "./Results/Validation_Cohort_Demo_Table_1_padjust.csv",
row.names = FALSE)## Vector of variables to summarize
vc_abx_vars <- c("Basiliximab", "Mycophenolate", "Steroid", "Systemic Vancomycin",
"Tacrolimus", "Cefepime", "Metronidazole", "Piperacillin/Tazobactam",
"Rifaximin", "Ceftriaxone", "Ciprofloxacin", "Gentamicin",
"Tobramicin", "Daptomycin", "Meropenem", "Oral Vancomycin")
vc_abx2 <- vc_abx %>%
filter(grepl(pattern = "basilix|tacro|steroid|mycophenolate|lactulose",
medication_name, ignore.case = T) | grepl(pattern = "GLUCOCORTICOIDS|steroid",
pharm_class, ignore.case = T) | grepl(pattern = "steroid",
pharm_sub_class, ignore.case = T) | grepl("given", mar_action,
ignore.case = T)) %>%
mutate(Immunosuppressants = case_when(grepl("basilix", medication_name,
ignore.case = T) ~ "Basiliximab", grepl("tacro", medication_name,
ignore.case = T) ~ "Tacrolimus", grepl("one|ide|solu-cortef",
medication_name, ignore.case = T) ~ "Steroid", grepl("mycophenolate",
medication_name, ignore.case = T) ~ "Mycophenolate",
grepl("lactulose", medication_name, ignore.case = T) ~
"Lactulose"), Antibiotics = case_when(grepl("rifaximin",
medication_name, ignore.case = T) ~ "Rifaximin", grepl("lactulose",
medication_name, ignore.case = T) ~ "Lactulose", grepl("ceftriaxone",
medication_name, ignore.case = T) ~ "Ceftriaxone", grepl("piperacillin|tazobactam",
medication_name, ignore.case = T) ~ "Piperacillin/Tazobactam",
grepl("cefepime", medication_name, ignore.case = T) ~
"Cefepime", grepl("meropenem", medication_name, ignore.case = T) ~
"Meropenem", grepl("gentamicin", medication_name,
ignore.case = T) ~ "Gentamicin", grepl("tobramycin",
medication_name, ignore.case = T) ~ "Tobramicin",
grepl("vancomycin.+oral", medication_name, ignore.case = T) ~
"Oral Vancomycin", grepl("vancomycin.+(IV|Intravenous)",
medication_name, ignore.case = T) ~ "Systemic Vancomycin",
grepl("METRONIDAZOLE", medication_name, ignore.case = T) ~
"Metronidazole", grepl("DAPTOMYCIN", medication_name,
ignore.case = T) ~ "Daptomycin", grepl("linezolid",
medication_name, ignore.case = T) ~ "Linezolid",
grepl("fluconazole", medication_name, ignore.case = T) ~
"Fluconazole", grepl("micafungin", medication_name,
ignore.case = T) ~ "Micafungin", grepl("cipro", medication_name,
ignore.case = T) & !grepl("drop", dose_units, ignore.case = T) ~
"Ciprofloxacin"), action = case_when(!is.na(Immunosuppressants) &
between(days_transplant, 0, 30) | !is.na(Immunosuppressants) &
ordering_mode == "Outpatient" ~ "keep", !is.na(Antibiotics) &
between(days_transplant, -14, 1) ~ "keep", TRUE ~ "remove")) %>%
group_by(patientID, Immunosuppressants, Antibiotics) %>%
arrange(days_transplant) %>%
filter(action == "keep") %>%
dplyr::slice(1) %>%
select(patientID, bact_infection_present, Immunosuppressants,
Antibiotics) %>%
# left_join(peri_criteria_all %>% select(patientID,
# any_infection)) %>%
pivot_longer(!c(patientID, bact_infection_present), names_to = "variable",
values_to = "value") %>%
drop_na(value) %>%
mutate(variable = 1) %>%
pivot_wider(id_cols = c(patientID, bact_infection_present),
names_from = "value", values_from = "variable", values_fn = min) %>%
replace(is.na(.), 0) %>%
mutate(bact_infection_present = recode(bact_infection_present,
No = "No Infection", Yes = "Bacterial Infection"), bact_infection_present = factor(bact_infection_present,
levels = c("No Infection", "Bacterial Infection")))
vc_abx_tab1_1 <- CreateTableOne(vars = vc_abx_vars, testNonNormal = "kruskal.test",
includeNA = FALSE, factorVars = vc_abx_vars, strata = "bact_infection_present",
data = vc_abx2)
summary(vc_abx_tab1_1)##
## ### Summary of categorical variables ###
##
## bact_infection_present: No Infection
## var n miss p.miss level freq percent cum.percent
## Basiliximab 28 0 0.0 0 11 39.3 39.3
## 1 17 60.7 100.0
##
## Mycophenolate 28 0 0.0 0 4 14.3 14.3
## 1 24 85.7 100.0
##
## Steroid 28 0 0.0 1 28 100.0 100.0
##
## Systemic Vancomycin 28 0 0.0 0 10 35.7 35.7
## 1 18 64.3 100.0
##
## Tacrolimus 28 0 0.0 1 28 100.0 100.0
##
## Cefepime 28 0 0.0 0 16 57.1 57.1
## 1 12 42.9 100.0
##
## Metronidazole 28 0 0.0 0 10 35.7 35.7
## 1 18 64.3 100.0
##
## Piperacillin/Tazobactam 28 0 0.0 0 5 17.9 17.9
## 1 23 82.1 100.0
##
## Rifaximin 28 0 0.0 0 12 42.9 42.9
## 1 16 57.1 100.0
##
## Ceftriaxone 28 0 0.0 0 22 78.6 78.6
## 1 6 21.4 100.0
##
## Ciprofloxacin 28 0 0.0 0 21 75.0 75.0
## 1 7 25.0 100.0
##
## Gentamicin 28 0 0.0 0 27 96.4 96.4
## 1 1 3.6 100.0
##
## Tobramicin 28 0 0.0 0 22 78.6 78.6
## 1 6 21.4 100.0
##
## Daptomycin 28 0 0.0 0 27 96.4 96.4
## 1 1 3.6 100.0
##
## Meropenem 28 0 0.0 0 28 100.0 100.0
## 1 0 0.0 100.0
##
## Oral Vancomycin 28 0 0.0 0 27 96.4 96.4
## 1 1 3.6 100.0
##
## ------------------------------------------------------------
## bact_infection_present: Bacterial Infection
## var n miss p.miss level freq percent cum.percent
## Basiliximab 7 0 0.0 0 2 28.6 28.6
## 1 5 71.4 100.0
##
## Mycophenolate 7 0 0.0 0 1 14.3 14.3
## 1 6 85.7 100.0
##
## Steroid 7 0 0.0 1 7 100.0 100.0
##
## Systemic Vancomycin 7 0 0.0 0 1 14.3 14.3
## 1 6 85.7 100.0
##
## Tacrolimus 7 0 0.0 1 7 100.0 100.0
##
## Cefepime 7 0 0.0 0 5 71.4 71.4
## 1 2 28.6 100.0
##
## Metronidazole 7 0 0.0 0 5 71.4 71.4
## 1 2 28.6 100.0
##
## Piperacillin/Tazobactam 7 0 0.0 0 2 28.6 28.6
## 1 5 71.4 100.0
##
## Rifaximin 7 0 0.0 0 2 28.6 28.6
## 1 5 71.4 100.0
##
## Ceftriaxone 7 0 0.0 0 4 57.1 57.1
## 1 3 42.9 100.0
##
## Ciprofloxacin 7 0 0.0 0 6 85.7 85.7
## 1 1 14.3 100.0
##
## Gentamicin 7 0 0.0 0 7 100.0 100.0
## 1 0 0.0 100.0
##
## Tobramicin 7 0 0.0 0 3 42.9 42.9
## 1 4 57.1 100.0
##
## Daptomycin 7 0 0.0 0 6 85.7 85.7
## 1 1 14.3 100.0
##
## Meropenem 7 0 0.0 0 6 85.7 85.7
## 1 1 14.3 100.0
##
## Oral Vancomycin 7 0 0.0 0 7 100.0 100.0
## 1 0 0.0 100.0
##
##
## p-values
## pApprox pExact
## Basiliximab 0.9303088 0.6888863
## Mycophenolate 1.0000000 1.0000000
## Steroid NA NA
## Systemic Vancomycin 0.5240000 0.3916280
## Tacrolimus NA NA
## Cefepime 0.7958092 0.6760304
## Metronidazole 0.2002397 0.1122199
## Piperacillin/Tazobactam 0.9158646 0.6078263
## Rifaximin 0.7958092 0.6760304
## Ceftriaxone 0.4985291 0.3397075
## Ciprofloxacin 0.9198406 1.0000000
## Gentamicin 1.0000000 1.0000000
## Tobramicin 0.1605806 0.1553524
## Daptomycin 0.8555397 0.3647059
## Meropenem 0.4466872 0.2000000
## Oral Vancomycin 1.0000000 1.0000000
##
## Standardize mean differences
## 1 vs 2
## Basiliximab 0.2277569
## Mycophenolate 0.0000000
## Steroid 0.0000000
## Systemic Vancomycin 0.5107539
## Tacrolimus 0.0000000
## Cefepime 0.3015113
## Metronidazole 0.7669650
## Piperacillin/Tazobactam 0.2558409
## Rifaximin 0.3015113
## Ceftriaxone 0.4714045
## Ciprofloxacin 0.2721655
## Gentamicin 0.2721655
## Tobramicin 0.7856742
## Daptomycin 0.3825460
## Meropenem 0.5773503
## Oral Vancomycin 0.2721655
## Stratified by bact_infection_present
## No Infection Bacterial Infection p test
## n 28 7
## Basiliximab = 1 (%) 17 ( 60.7) 5 ( 71.4) 0.930
## Mycophenolate = 1 (%) 24 ( 85.7) 6 ( 85.7) 1.000
## Steroid = 1 (%) 28 (100.0) 7 (100.0) NA
## Systemic Vancomycin = 1 (%) 18 ( 64.3) 6 ( 85.7) 0.524
## Tacrolimus = 1 (%) 28 (100.0) 7 (100.0) NA
## Cefepime = 1 (%) 12 ( 42.9) 2 ( 28.6) 0.796
## Metronidazole = 1 (%) 18 ( 64.3) 2 ( 28.6) 0.200
## Piperacillin/Tazobactam = 1 (%) 23 ( 82.1) 5 ( 71.4) 0.916
## Rifaximin = 1 (%) 16 ( 57.1) 5 ( 71.4) 0.796
## Ceftriaxone = 1 (%) 6 ( 21.4) 3 ( 42.9) 0.499
## Ciprofloxacin = 1 (%) 7 ( 25.0) 1 ( 14.3) 0.920
## Gentamicin = 1 (%) 1 ( 3.6) 0 ( 0.0) 1.000
## Tobramicin = 1 (%) 6 ( 21.4) 4 ( 57.1) 0.161
## Daptomycin = 1 (%) 1 ( 3.6) 1 ( 14.3) 0.856
## Meropenem = 1 (%) 0 ( 0.0) 1 ( 14.3) 0.447
## Oral Vancomycin = 1 (%) 1 ( 3.6) 0 ( 0.0) 1.000
write.csv(vc_abx_tab1_2, "./Results/Validation_Cohort_ABX_Table_1.csv",
row.names = TRUE) # Saving then reading in the same data allows for an easy way to adjust p-values, since it loads the object as a dataframe
# Need to adjust pvalues and arrange properly....hence the
# multiple dataframes below
vc_abx_tab1_2_padjust1 <- read.csv("./Results/Validation_Cohort_ABX_Table_1.csv") %>%
dplyr::rename(` ` = X, `Bacterial Infection` = Bacterial.Infection,
`No Infection` = No.Infection)
vc_abx_tab1_2_padjust2 <- vc_abx_tab1_2_padjust1 %>%
mutate(` ` = factor(` `, levels = vc_abx_tab1_2_padjust1$` `))
vc_abx_tab1_2_padjust3 <- vc_abx_tab1_2_padjust1 %>%
mutate(test = ifelse(!is.na(p) & is.na(test), "chi.sq", "")) %>%
group_by(test) %>%
rstatix::adjust_pvalue(p.col = "p", method = "BH") %>%
ungroup() %>%
mutate(` ` = factor(` `, vc_abx_tab1_2_padjust2$` `)) %>%
arrange(` `) %>%
mutate(p = ifelse(is.na(p), "", p), p.adj = ifelse(is.na(p.adj),
"", p.adj))
# Read in csv to then append adjusted pvalues
write.csv(vc_abx_tab1_2_padjust3, "./Results/Validation_Cohort_ABX_Table_1_padjust.csv",
row.names = FALSE)demo_cohorts <- vc_demo %>%
mutate(cohort = "Validation") %>%
bind_rows(demo %>%
mutate(cohort = "Original"))
cohorts_tab1_1 <- CreateTableOne(vars = demo_vars, testNonNormal = "kruskal.test",
includeNA = TRUE, factorVars = demo_cats, strata = "cohort",
data = demo_cohorts)
summary(cohorts_tab1_1) # Age is potentially skewed, need to state that it is skewed and re-run `CreateTableOne`##
## ### Summary of continuous variables ###
##
## cohort: Original
## n miss p.miss mean sd median p25 p75 min max skew kurt
## age 107 0 0 52 16 56 42 64 2 77 -0.99 0.7
## meld_transplant 107 0 0 26 10 29 19 33 6 49 -0.09 -0.8
## ------------------------------------------------------------
## cohort: Validation
## n miss p.miss mean sd median p25 p75 min max skew kurt
## age 35 0 0 52 13 55 43 62 15 71 -0.87 0.3
## meld_transplant 35 0 0 27 10 28 22 32 6 46 -0.01 -0.4
##
## p-values
## pNormal pNonNormal
## age 0.8978311 0.5811413
## meld_transplant 0.6659789 0.8182777
##
## Standardize mean differences
## 1 vs 2
## age 0.02619913
## meld_transplant 0.08548448
##
## =======================================================================================
##
## ### Summary of categorical variables ###
##
## cohort: Original
## var n miss p.miss
## race 107 0 0.0
##
##
##
##
##
##
##
##
## sex 107 0 0.0
##
##
## Alcoholic Hepatitis 107 0 0.0
##
##
## Alcoholic Cirrhosis 107 0 0.0
##
##
## NAFLD/NASH 107 0 0.0
##
##
## Primary Sclerosing Cholangitis 107 0 0.0
##
##
## Acute Viral Hepatitis 107 0 0.0
##
##
## Chronic Hepatitis B 107 0 0.0
##
##
## Chronic Hepatitis C 107 0 0.0
##
##
## Autoimmune 107 0 0.0
##
##
## Wilson's Disease 107 0 0.0
##
##
## Alpha-1 Antitrypsin 107 0 0.0
##
## Hemachromatosis 107 0 0.0
##
##
## Drug Induced Liver Injury or Toxin 107 0 0.0
##
##
## Budd Chiari 107 0 0.0
##
## Cryptogenic 107 0 0.0
##
##
## Malignancy 107 0 0.0
##
##
## Other 107 0 0.0
##
##
## Dialysis 107 0 0.0
##
##
## Pressers 107 0 0.0
##
##
## Mechanical Ventilation 107 0 0.0
##
##
## level freq percent cum.percent
## American Indian or Alaska Native 1 0.9 0.9
## Asian/Mideast Indian 8 7.5 8.4
## Black/African-American 11 10.3 18.7
## More than one Race 10 9.3 28.0
## Other Pacific Islander 0 0.0 28.0
## Patient Declined 5 4.7 32.7
## Unknown 2 1.9 34.6
## White 70 65.4 100.0
##
## Female 47 43.9 43.9
## Male 60 56.1 100.0
##
## 0 99 92.5 92.5
## 1 8 7.5 100.0
##
## 0 59 55.1 55.1
## 1 48 44.9 100.0
##
## 0 91 85.0 85.0
## 1 16 15.0 100.0
##
## 0 101 94.4 94.4
## 1 6 5.6 100.0
##
## 0 103 96.3 96.3
## 1 4 3.7 100.0
##
## 0 106 99.1 99.1
## 1 1 0.9 100.0
##
## 0 103 96.3 96.3
## 1 4 3.7 100.0
##
## 0 102 95.3 95.3
## 1 5 4.7 100.0
##
## 0 104 97.2 97.2
## 1 3 2.8 100.0
##
## 0 107 100.0 100.0
##
## 0 106 99.1 99.1
## 1 1 0.9 100.0
##
## 0 106 99.1 99.1
## 1 1 0.9 100.0
##
## 0 107 100.0 100.0
##
## 0 102 95.3 95.3
## 1 5 4.7 100.0
##
## 0 84 78.5 78.5
## 1 23 21.5 100.0
##
## 0 94 87.9 87.9
## 1 13 12.1 100.0
##
## 0 76 71.0 71.0
## 1 31 29.0 100.0
##
## 0 96 89.7 89.7
## 1 11 10.3 100.0
##
## 0 100 93.5 93.5
## 1 7 6.5 100.0
##
## ------------------------------------------------------------
## cohort: Validation
## var n miss p.miss
## race 35 0 0.0
##
##
##
##
##
##
##
##
## sex 35 0 0.0
##
##
## Alcoholic Hepatitis 35 0 0.0
##
##
## Alcoholic Cirrhosis 35 0 0.0
##
##
## NAFLD/NASH 35 0 0.0
##
##
## Primary Sclerosing Cholangitis 35 0 0.0
##
##
## Acute Viral Hepatitis 35 0 0.0
##
##
## Chronic Hepatitis B 35 0 0.0
##
##
## Chronic Hepatitis C 35 0 0.0
##
##
## Autoimmune 35 0 0.0
##
##
## Wilson's Disease 35 0 0.0
##
##
## Alpha-1 Antitrypsin 35 0 0.0
##
## Hemachromatosis 35 0 0.0
##
##
## Drug Induced Liver Injury or Toxin 35 0 0.0
##
##
## Budd Chiari 35 0 0.0
##
## Cryptogenic 35 0 0.0
##
##
## Malignancy 35 0 0.0
##
##
## Other 35 0 0.0
##
##
## Dialysis 35 0 0.0
##
##
## Pressers 35 0 0.0
##
##
## Mechanical Ventilation 35 0 0.0
##
##
## level freq percent cum.percent
## American Indian or Alaska Native 1 2.9 2.9
## Asian/Mideast Indian 0 0.0 2.9
## Black/African-American 3 8.6 11.4
## More than one Race 1 2.9 14.3
## Other Pacific Islander 1 2.9 17.1
## Patient Declined 0 0.0 17.1
## Unknown 4 11.4 28.6
## White 25 71.4 100.0
##
## Female 16 45.7 45.7
## Male 19 54.3 100.0
##
## 0 34 97.1 97.1
## 1 1 2.9 100.0
##
## 0 21 60.0 60.0
## 1 14 40.0 100.0
##
## 0 34 97.1 97.1
## 1 1 2.9 100.0
##
## 0 35 100.0 100.0
## 1 0 0.0 100.0
##
## 0 35 100.0 100.0
## 1 0 0.0 100.0
##
## 0 35 100.0 100.0
## 1 0 0.0 100.0
##
## 0 33 94.3 94.3
## 1 2 5.7 100.0
##
## 0 35 100.0 100.0
## 1 0 0.0 100.0
##
## 0 35 100.0 100.0
## 1 0 0.0 100.0
##
## 0 35 100.0 100.0
##
## 0 35 100.0 100.0
## 1 0 0.0 100.0
##
## 0 35 100.0 100.0
## 1 0 0.0 100.0
##
## 0 35 100.0 100.0
##
## 0 35 100.0 100.0
## 1 0 0.0 100.0
##
## 0 34 97.1 97.1
## 1 1 2.9 100.0
##
## 0 30 85.7 85.7
## 1 5 14.3 100.0
##
## 0 20 57.1 57.1
## 1 15 42.9 100.0
##
## 0 26 74.3 74.3
## 1 9 25.7 100.0
##
## 0 32 91.4 91.4
## 1 3 8.6 100.0
##
##
## p-values
## pApprox pExact
## race 0.03166089 0.030261150
## sex 1.00000000 1.000000000
## Alcoholic Hepatitis 0.56591461 0.452490423
## Alcoholic Cirrhosis 0.75891760 0.696336171
## NAFLD/NASH 0.10660916 0.071552735
## Primary Sclerosing Cholangitis 0.34337432 0.336196590
## Acute Viral Hepatitis 0.56740960 0.572085814
## Chronic Hepatitis B 1.00000000 1.000000000
## Chronic Hepatitis C 0.98368436 0.636320472
## Autoimmune 0.43907269 0.333322101
## Wilson's Disease 0.74577750 1.000000000
## Alpha-1 Antitrypsin NA NA
## Hemachromatosis 1.00000000 1.000000000
## Drug Induced Liver Injury or Toxin 1.00000000 1.000000000
## Budd Chiari NA NA
## Cryptogenic 0.43907269 0.333322101
## Malignancy 0.02177662 0.008768824
## Other 0.97040937 0.772171814
## Dialysis 0.18828168 0.147658755
## Pressers 0.04564895 0.045954898
## Mechanical Ventilation 0.97862090 0.708021103
##
## Standardize mean differences
## 1 vs 2
## race 0.78541034
## sex 0.03598046
## Alcoholic Hepatitis 0.20983452
## Alcoholic Cirrhosis 0.09844884
## NAFLD/NASH 0.43460701
## Primary Sclerosing Cholangitis 0.34469099
## Acute Viral Hepatitis 0.27869321
## Chronic Hepatitis B 0.13736056
## Chronic Hepatitis C 0.09321888
## Autoimmune 0.31311215
## Wilson's Disease 0.24019223
## Alpha-1 Antitrypsin 0.00000000
## Hemachromatosis 0.13736056
## Drug Induced Liver Injury or Toxin 0.13736056
## Budd Chiari 0.00000000
## Cryptogenic 0.31311215
## Malignancy 0.59461176
## Other 0.06310471
## Dialysis 0.29250434
## Pressers 0.41011137
## Mechanical Ventilation 0.07683819
cohorts_tableone_skewed <- c("age", "meld_transplant")
cohorts_tab1_2 <- print(cohorts_tab1_1, nonnormal = cohorts_tableone_skewed,
formatOptions = list(big.mark = ","))## Stratified by cohort
## Original
## n 107
## race (%)
## American Indian or Alaska Native 1 ( 0.9)
## Asian/Mideast Indian 8 ( 7.5)
## Black/African-American 11 ( 10.3)
## More than one Race 10 ( 9.3)
## Other Pacific Islander 0 ( 0.0)
## Patient Declined 5 ( 4.7)
## Unknown 2 ( 1.9)
## White 70 ( 65.4)
## sex = Male (%) 60 ( 56.1)
## age (median [IQR]) 56.00 [41.50, 64.00]
## meld_transplant (median [IQR]) 29.00 [19.00, 33.00]
## Alcoholic Hepatitis = 1 (%) 8 ( 7.5)
## Alcoholic Cirrhosis = 1 (%) 48 ( 44.9)
## NAFLD/NASH = 1 (%) 16 ( 15.0)
## Primary Sclerosing Cholangitis = 1 (%) 6 ( 5.6)
## Acute Viral Hepatitis = 1 (%) 4 ( 3.7)
## Chronic Hepatitis B = 1 (%) 1 ( 0.9)
## Chronic Hepatitis C = 1 (%) 4 ( 3.7)
## Autoimmune = 1 (%) 5 ( 4.7)
## Wilson's Disease = 1 (%) 3 ( 2.8)
## Alpha-1 Antitrypsin = 0 (%) 107 (100.0)
## Hemachromatosis = 1 (%) 1 ( 0.9)
## Drug Induced Liver Injury or Toxin = 1 (%) 1 ( 0.9)
## Budd Chiari = 0 (%) 107 (100.0)
## Cryptogenic = 1 (%) 5 ( 4.7)
## Malignancy = 1 (%) 23 ( 21.5)
## Other = 1 (%) 13 ( 12.1)
## Dialysis = 1 (%) 31 ( 29.0)
## Pressers = 1 (%) 11 ( 10.3)
## Mechanical Ventilation = 1 (%) 7 ( 6.5)
## Stratified by cohort
## Validation p
## n 35
## race (%) 0.032
## American Indian or Alaska Native 1 ( 2.9)
## Asian/Mideast Indian 0 ( 0.0)
## Black/African-American 3 ( 8.6)
## More than one Race 1 ( 2.9)
## Other Pacific Islander 1 ( 2.9)
## Patient Declined 0 ( 0.0)
## Unknown 4 ( 11.4)
## White 25 ( 71.4)
## sex = Male (%) 19 ( 54.3) 1.000
## age (median [IQR]) 55.00 [43.00, 62.00] 0.581
## meld_transplant (median [IQR]) 28.00 [21.50, 32.50] 0.818
## Alcoholic Hepatitis = 1 (%) 1 ( 2.9) 0.566
## Alcoholic Cirrhosis = 1 (%) 14 ( 40.0) 0.759
## NAFLD/NASH = 1 (%) 1 ( 2.9) 0.107
## Primary Sclerosing Cholangitis = 1 (%) 0 ( 0.0) 0.343
## Acute Viral Hepatitis = 1 (%) 0 ( 0.0) 0.567
## Chronic Hepatitis B = 1 (%) 0 ( 0.0) 1.000
## Chronic Hepatitis C = 1 (%) 2 ( 5.7) 0.984
## Autoimmune = 1 (%) 0 ( 0.0) 0.439
## Wilson's Disease = 1 (%) 0 ( 0.0) 0.746
## Alpha-1 Antitrypsin = 0 (%) 35 (100.0) NA
## Hemachromatosis = 1 (%) 0 ( 0.0) 1.000
## Drug Induced Liver Injury or Toxin = 1 (%) 0 ( 0.0) 1.000
## Budd Chiari = 0 (%) 35 (100.0) NA
## Cryptogenic = 1 (%) 0 ( 0.0) 0.439
## Malignancy = 1 (%) 1 ( 2.9) 0.022
## Other = 1 (%) 5 ( 14.3) 0.970
## Dialysis = 1 (%) 15 ( 42.9) 0.188
## Pressers = 1 (%) 9 ( 25.7) 0.046
## Mechanical Ventilation = 1 (%) 3 ( 8.6) 0.979
## Stratified by cohort
## test
## n
## race (%)
## American Indian or Alaska Native
## Asian/Mideast Indian
## Black/African-American
## More than one Race
## Other Pacific Islander
## Patient Declined
## Unknown
## White
## sex = Male (%)
## age (median [IQR]) nonnorm
## meld_transplant (median [IQR]) nonnorm
## Alcoholic Hepatitis = 1 (%)
## Alcoholic Cirrhosis = 1 (%)
## NAFLD/NASH = 1 (%)
## Primary Sclerosing Cholangitis = 1 (%)
## Acute Viral Hepatitis = 1 (%)
## Chronic Hepatitis B = 1 (%)
## Chronic Hepatitis C = 1 (%)
## Autoimmune = 1 (%)
## Wilson's Disease = 1 (%)
## Alpha-1 Antitrypsin = 0 (%)
## Hemachromatosis = 1 (%)
## Drug Induced Liver Injury or Toxin = 1 (%)
## Budd Chiari = 0 (%)
## Cryptogenic = 1 (%)
## Malignancy = 1 (%)
## Other = 1 (%)
## Dialysis = 1 (%)
## Pressers = 1 (%)
## Mechanical Ventilation = 1 (%)
write.csv(cohorts_tab1_2, "./Results/Both_Cohorts_Demo_Table_1.csv",
row.names = TRUE) # Saving then reading in the same data allows for an easy way to adjust p-values, since it loads the object as a dataframe
# Need to adjust pvalues and arrange properly....hence the
# multiple dataframes below
cohorts_tab1_2_padjust1 <- read.csv("./Results/Both_Cohorts_Demo_Table_1.csv") %>%
dplyr::rename(` ` = X)
cohorts_tab1_2_padjust2 <- cohorts_tab1_2_padjust1 %>%
mutate(` ` = factor(` `, levels = cohorts_tab1_2_padjust1$` `))
cohorts_tab1_2_padjust3 <- cohorts_tab1_2_padjust1 %>%
mutate(test = ifelse(!is.na(p) & test == "", "chi.sq", test)) %>%
group_by(test) %>%
rstatix::adjust_pvalue(p.col = "p", method = "BH") %>%
ungroup() %>%
mutate(` ` = factor(` `, cohorts_tab1_2_padjust2$` `)) %>%
arrange(` `) %>%
mutate(p = ifelse(is.na(p), "", p), p.adj = ifelse(is.na(p.adj),
"", p.adj))
# Read in csv to then append adjusted pvalues
write.csv(cohorts_tab1_2_padjust3, "./Results/Both_Cohorts_Demo_Table_1_padjust.csv",
row.names = FALSE)cohorts_abx <- vc_abx2 %>%
mutate(cohort = "Validation") %>%
bind_rows(abx2 %>%
mutate(cohort = "Original"))
cohorts_abx_tab1_1 <- CreateTableOne(vars = abx_vars, testNonNormal = "kruskal.test",
includeNA = FALSE, factorVars = abx_vars, strata = "cohort",
data = cohorts_abx)
summary(cohorts_abx_tab1_1)##
## ### Summary of categorical variables ###
##
## cohort: Original
## var n miss p.miss level freq percent cum.percent
## Basiliximab 107 0 0.0 0 37 34.6 34.6
## 1 70 65.4 100.0
##
## Mycophenolate 107 0 0.0 0 21 19.6 19.6
## 1 86 80.4 100.0
##
## Steroid 107 0 0.0 1 107 100.0 100.0
##
## Systemic Vancomycin 107 0 0.0 0 44 41.1 41.1
## 1 63 58.9 100.0
##
## Tacrolimus 107 0 0.0 0 1 0.9 0.9
## 1 106 99.1 100.0
##
## Cefepime 107 0 0.0 0 65 60.7 60.7
## 1 42 39.3 100.0
##
## Metronidazole 107 0 0.0 0 58 54.2 54.2
## 1 49 45.8 100.0
##
## Piperacillin/Tazobactam 107 0 0.0 0 14 13.1 13.1
## 1 93 86.9 100.0
##
## Rifaximin 107 0 0.0 0 54 50.5 50.5
## 1 53 49.5 100.0
##
## Ceftriaxone 107 0 0.0 0 81 75.7 75.7
## 1 26 24.3 100.0
##
## Ciprofloxacin 107 0 0.0 0 87 81.3 81.3
## 1 20 18.7 100.0
##
## Gentamicin 107 0 0.0 0 105 98.1 98.1
## 1 2 1.9 100.0
##
## Tobramicin 107 0 0.0 0 100 93.5 93.5
## 1 7 6.5 100.0
##
## Daptomycin 107 0 0.0 0 103 96.3 96.3
## 1 4 3.7 100.0
##
## Meropenem 107 0 0.0 0 102 95.3 95.3
## 1 5 4.7 100.0
##
## Oral Vancomycin 107 0 0.0 0 104 97.2 97.2
## 1 3 2.8 100.0
##
## ------------------------------------------------------------
## cohort: Validation
## var n miss p.miss level freq percent cum.percent
## Basiliximab 35 0 0.0 0 13 37.1 37.1
## 1 22 62.9 100.0
##
## Mycophenolate 35 0 0.0 0 5 14.3 14.3
## 1 30 85.7 100.0
##
## Steroid 35 0 0.0 1 35 100.0 100.0
##
## Systemic Vancomycin 35 0 0.0 0 11 31.4 31.4
## 1 24 68.6 100.0
##
## Tacrolimus 35 0 0.0 0 0 0.0 0.0
## 1 35 100.0 100.0
##
## Cefepime 35 0 0.0 0 21 60.0 60.0
## 1 14 40.0 100.0
##
## Metronidazole 35 0 0.0 0 15 42.9 42.9
## 1 20 57.1 100.0
##
## Piperacillin/Tazobactam 35 0 0.0 0 7 20.0 20.0
## 1 28 80.0 100.0
##
## Rifaximin 35 0 0.0 0 14 40.0 40.0
## 1 21 60.0 100.0
##
## Ceftriaxone 35 0 0.0 0 26 74.3 74.3
## 1 9 25.7 100.0
##
## Ciprofloxacin 35 0 0.0 0 27 77.1 77.1
## 1 8 22.9 100.0
##
## Gentamicin 35 0 0.0 0 34 97.1 97.1
## 1 1 2.9 100.0
##
## Tobramicin 35 0 0.0 0 25 71.4 71.4
## 1 10 28.6 100.0
##
## Daptomycin 35 0 0.0 0 33 94.3 94.3
## 1 2 5.7 100.0
##
## Meropenem 35 0 0.0 0 34 97.1 97.1
## 1 1 2.9 100.0
##
## Oral Vancomycin 35 0 0.0 0 34 97.1 97.1
## 1 1 2.9 100.0
##
##
## p-values
## pApprox pExact
## Basiliximab 0.942780159 0.839501517
## Mycophenolate 0.647386484 0.617315706
## Steroid NA NA
## Systemic Vancomycin 0.411089474 0.326510288
## Tacrolimus 1.000000000 1.000000000
## Cefepime 1.000000000 1.000000000
## Metronidazole 0.331418773 0.330128924
## Piperacillin/Tazobactam 0.467698201 0.409791463
## Rifaximin 0.378233199 0.332115186
## Ceftriaxone 1.000000000 1.000000000
## Ciprofloxacin 0.769554057 0.627387810
## Gentamicin 1.000000000 1.000000000
## Tobramicin 0.001447514 0.001400685
## Daptomycin 0.983684356 0.636320472
## Meropenem 1.000000000 1.000000000
## Oral Vancomycin 1.000000000 1.000000000
##
## Standardize mean differences
## 1 vs 2
## Basiliximab 0.053468977
## Mycophenolate 0.142680721
## Steroid 0.000000000
## Systemic Vancomycin 0.202634683
## Tacrolimus 0.137360564
## Cefepime 0.015286339
## Metronidazole 0.228545001
## Piperacillin/Tazobactam 0.186942613
## Rifaximin 0.211475899
## Ceftriaxone 0.032684568
## Ciprofloxacin 0.102813256
## Gentamicin 0.065076934
## Tobramicin 0.604939677
## Daptomycin 0.093218882
## Meropenem 0.095499590
## Oral Vancomycin 0.003220228
## Stratified by cohort
## Original Validation p test
## n 107 35
## Basiliximab = 1 (%) 70 ( 65.4) 22 ( 62.9) 0.943
## Mycophenolate = 1 (%) 86 ( 80.4) 30 ( 85.7) 0.647
## Steroid = 1 (%) 107 (100.0) 35 (100.0) NA
## Systemic Vancomycin = 1 (%) 63 ( 58.9) 24 ( 68.6) 0.411
## Tacrolimus = 1 (%) 106 ( 99.1) 35 (100.0) 1.000
## Cefepime = 1 (%) 42 ( 39.3) 14 ( 40.0) 1.000
## Metronidazole = 1 (%) 49 ( 45.8) 20 ( 57.1) 0.331
## Piperacillin/Tazobactam = 1 (%) 93 ( 86.9) 28 ( 80.0) 0.468
## Rifaximin = 1 (%) 53 ( 49.5) 21 ( 60.0) 0.378
## Ceftriaxone = 1 (%) 26 ( 24.3) 9 ( 25.7) 1.000
## Ciprofloxacin = 1 (%) 20 ( 18.7) 8 ( 22.9) 0.770
## Gentamicin = 1 (%) 2 ( 1.9) 1 ( 2.9) 1.000
## Tobramicin = 1 (%) 7 ( 6.5) 10 ( 28.6) 0.001
## Daptomycin = 1 (%) 4 ( 3.7) 2 ( 5.7) 0.984
## Meropenem = 1 (%) 5 ( 4.7) 1 ( 2.9) 1.000
## Oral Vancomycin = 1 (%) 3 ( 2.8) 1 ( 2.9) 1.000
write.csv(cohorts_abx_tab1_2, "./Results/Both_Cohorts_ABX_Table_1.csv",
row.names = TRUE) # Saving then reading in the same data allows for an easy way to adjust p-values, since it loads the object as a dataframe
# Need to adjust pvalues and arrange properly....hence the
# multiple dataframes below
cohorts_abx_tab1_2_padjust1 <- read.csv("./Results/Both_Cohorts_ABX_Table_1.csv") %>%
dplyr::rename(` ` = X)
cohorts_abx_tab1_2_padjust2 <- cohorts_abx_tab1_2_padjust1 %>%
mutate(` ` = factor(` `, levels = cohorts_abx_tab1_2_padjust1$` `))
cohorts_abx_tab1_2_padjust3 <- cohorts_abx_tab1_2_padjust1 %>%
mutate(test = ifelse(!is.na(p) & is.na(test), "chi.sq", "")) %>%
group_by(test) %>%
rstatix::adjust_pvalue(p.col = "p", method = "BH") %>%
ungroup() %>%
mutate(` ` = factor(` `, cohorts_abx_tab1_2_padjust2$` `)) %>%
arrange(` `) %>%
mutate(p = ifelse(is.na(p), "", p), p.adj = ifelse(is.na(p.adj),
"", p.adj))
# Read in csv to then append adjusted pvalues
write.csv(cohorts_abx_tab1_2_padjust3, "./Results/Both_Cohorts_ABX_Table_1_padjust.csv",
row.names = FALSE)